Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[base/kvm] Add KVM enabled base container #949

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
13 changes: 12 additions & 1 deletion base-containers/base/bin/_run_student_intern
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ logger = setup_logger()
# Check the runtimes
runtime = sys.argv[1]
parent_runtime = sys.argv[2]
kvm_enabled = sys.argv[3] == "True"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd document the args at both sides, here and at the call side in _docker_interface.py (to avoid implementing a useless argumentparser that would be self documenting)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The arguments managment will be replaced by an argparser.

shared_kernel, both_same_kernel = check_runtimes(runtime, parent_runtime)
# shared_kernal: boolean, True when this student_container is running on docker runtime. False when running on kata runtime.
# only_dockers: boolean, True when this student_container and its parent grading_container are both running on docker runtimes.
Expand Down Expand Up @@ -56,6 +57,16 @@ for name, (spath, append) in system_files.items():

logger.info("student container started and received initial command")

# If the current container requires KVM passthrough for interactive session, launch KVM start script as root.
if kvm_enabled and start_cmd["ssh"]:
kvm_start_path = os.environ.get('KVM_START_PATH')
if kvm_start_path is None:
logger.warning('KVM start script not found.')
exit(250)
else:
subprocess.Popen(shlex.split(kvm_start_path))
logger.info('KVM launched')

# Start the process
os.chdir(start_cmd["working_dir"])
set_limits = lambda: set_limits_user(user) # To know if the command should be executed as root or worker
Expand Down Expand Up @@ -93,7 +104,7 @@ scripts_isolation(True) # Setup script finished, make the scripts directory iso
# Handle SSH
if start_cmd["ssh"]:
logger.info("student container is starting ssh session")
retval = handle_ssh_session(student_container_id, both_same_kernel, event_loop, socket_unix, container_stdout, user)
retval = handle_ssh_session(student_container_id, both_same_kernel, event_loop, socket_unix, container_stdout, user, kvm_enabled)
logger.info("student container finished ssh session")

# Run teardown script
Expand Down
13 changes: 9 additions & 4 deletions base-containers/base/inginious_container_api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def execute_process(args, stdin_string="", internal_command=False, user="worker"
return stdout.read(), stderr.read()


def start_ssh_server(ssh_user):
def start_ssh_server(ssh_user, kvm_enabled: bool = False):
# Generate password
password, _ = execute_process(["/usr/bin/openssl", "rand", "-base64", "10"], internal_command=True, user=ssh_user)
password = password.decode('utf8').strip()
Expand All @@ -70,13 +70,18 @@ def start_ssh_server(ssh_user):
os.unlink("/run/nologin")

permit_root_login = "yes" if ssh_user == "root" else "no"
shell_forward = '-c "telnet localhost 2223"' if kvm_enabled else ''

# Wait for telnet session
if kvm_enabled:
while not os.path.exists('/task/student/kvm/.telnet'): pass

# Start the ssh server
execute_process(["/usr/sbin/sshd",
"-p", "22",
"-o", "PermitRootLogin={}".format(permit_root_login),
"-o", "PasswordAuthentication=yes", "-o", "StrictModes=no",
"-o", "ForceCommand=echo LOGIN: Good luck !; script -q .ssh_logs; cp .ssh_logs /task/student/.ssh_logs; echo LOGOUT: Good bye!",
"-o", f"ForceCommand=echo LOGIN: Good luck !; script {shell_forward} -q .ssh_logs; cp .ssh_logs /task/student/.ssh_logs; echo LOGOUT: Good bye!",
"-o", "AllowUsers={}".format(ssh_user)], internal_command=True, user=ssh_user)
return ssh_user, password
#When logging in, student is in a special interactive shell where everything is logged into a file.
Expand Down Expand Up @@ -179,9 +184,9 @@ def handle_signals(concerned_subprocess, com_socket):
sys.exit()


def handle_ssh_session(container_id, both_dockers, event_loop, socket_unix, container_stdout, user):
def handle_ssh_session(container_id, both_dockers, event_loop, socket_unix, container_stdout, user, kvm_enabled: bool = False):
""" Start the ssh server and send identification information """
ssh_user, password = start_ssh_server(user)
ssh_user, password = start_ssh_server(user, kvm_enabled)
if both_dockers:
# Send ssh information to the grading container
message = msgpack.dumps({"type": "ssh_student", "ssh_user": ssh_user, "password": password}) # constant size
Expand Down
102 changes: 102 additions & 0 deletions base-containers/kvm/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
ARG VERSION=latest
ARG REGISTRY

# Rockylinux does not enable 9P virtfs in the shipped QEMU builds, hence we have to rebuild our own version
# Inspired from https://github.com/acudovs/qemu-kvm-virtfs/blob/master/rpmbuild/build
FROM rockylinux:8 as builder

# Enable additional repos
RUN dnf -y install 'dnf-command(config-manager)' &&\
dnf config-manager --set-enabled powertools

# Install dependencies to build QEMU
RUN yum -y update && yum -y install \
glusterfs-api-devel \
glusterfs-devel \
iasl \
libcacard-devel \
libpmem-devel \
nss-devel \
pkgconfig \
spice-protocol \
spice-server-devel \
usbredir-devel \
yum-utils \
'pkgconfig(epoxy)' \
'pkgconfig(gbm)' \
'pkgconfig(libdrm)' \
git

# Apply patches and build QEMU
WORKDIR /opt
RUN yum-builddep -y qemu-kvm
#RUN yumdownloader --source qemu-kvm
RUN wget "http://download.rockylinux.org/pub/rocky/8/AppStream/source/tree/Packages/q/qemu-kvm-6.2.0-32.module%2Bel8.8.0%2B1279%2B230c2115.src.rpm"
RUN rpm -Uvh qemu-kvm-*.src.rpm
#RUN git clone https://git.rockylinux.org/staging/rpms/qemu-kvm.git &&\
# git -C qemu-kvm checkout r8s-stream-rhel &&\
# if [[ ! -d /root/rpmbuild ]]; then mkdir /root/rpmbuild; fi &&\
# mv qemu-kvm/{SOURCES,SPECS} /root/rpmbuild/
RUN sed -i -e 's/--disable-virtfs/--enable-virtfs/' \
-e 's/--disable-virtiofsd/--enable-virtiofsd/g' \
/root/rpmbuild/SPECS/qemu-kvm.spec
RUN sed -i -e '/^%files -n qemu-kvm-common/,/^$/s/^$/%{_bindir}\/virtfs-proxy-helper\n%{_mandir}\/man1\/virtfs-proxy-helper.1.gz\n/' \
-e '/^%if %{rhev}$/,/^%else$/s/pkgsuffix -ev/pkgsuffix -virtfs/' \
-e '/%define rhel_rhev_conflicts()/ a Provides: %1-ev = %{epoch}:%{version}-%{release} \\\nObsoletes: %1-ev < %{obsoletes_version} \\' \
-e 's/rm -rf ${RPM_BUILD_ROOT}%{_mandir}\/man1\/virtfs-proxy-helper\*//g' \
-e 's/rm -rf ${RPM_BUILD_ROOT}%{_libexecdir}\/virtfs-proxy-helper/mv ${RPM_BUILD_ROOT}%{_libexecdir}\/virtfs-proxy-helper ${RPM_BUILD_ROOT}%{_bindir}\/virtfs-proxy-helper/g' \
/root/rpmbuild/SPECS/qemu-kvm.spec
COPY virtio-9p-pci.patch /root/rpmbuild/SOURCES/
COPY qemu-kvm.spec.patch /tmp
RUN patch /root/rpmbuild/SPECS/qemu-kvm.spec /tmp/qemu-kvm.spec.patch
#RUN mkdir /tmp/qemu-6.2.0 &&\
# cp /root/rpmbuild/SOURCES/* /tmp/qemu-6.2.0/ &&\
# cd /tmp &&\
# tar cJf /tmp/qemu-6.2.0.tar.xz qemu-6.2.0 &&\
# mv /tmp/qemu-6.2.0.tar.xz /root/rpmbuild/SOURCES/
RUN rpmbuild -ba --clean /root/rpmbuild/SPECS/qemu-kvm.spec

# Build GNU telnetd server since classical builds do not allow running bash as login util
RUN wget "https://ftp.gnu.org/gnu/inetutils/inetutils-2.4.tar.xz" &&\
tar xf inetutils-2.4.tar.xz &&\
cd inetutils-2.4 &&\
./configure --disable-servers --disable-clients --enable-telnetd --enable-telnet &&\
make -j$(nproc)

# =====================
# KVM base container
# =====================
FROM ${REGISTRY}/inginious/env-base:${VERSION}
anthonygego marked this conversation as resolved.
Show resolved Hide resolved
LABEL org.inginious.kvm 1

# Install QEMU with 9P virtifs enabled
COPY --from=builder /root/rpmbuild/RPMS/x86_64/qemu-*.rpm /tmp/
RUN yum localinstall -y /tmp/*rpm &&\
rm -rf /tmp/*rpm &&\
ln -s /usr/libexec/qemu-kvm /bin/qemu-kvm

# Install GNU telnet utils
COPY --from=builder /opt/inetutils-2.4/telnet/telnet /usr/sbin
COPY --from=builder /opt/inetutils-2.4/telnetd/telnetd /usr/sbin

# Install dependecies
RUN dnf install -y expect xinetd git

# Make xinet config readable for worker user in SSH container
RUN chmod 644 /etc/xinetd.conf

# Install virtme
WORKDIR /opt
COPY virtme.patch .
RUN git clone https://github.com/amluto/virtme &&\
git -C virtme apply < virtme.patch &&\
ln -s $(pwd)/virtme/virtme-run /bin/virtme-run

WORKDIR /

# Get expect script launching the VM
COPY run.expect setup.sh telnet_login.sh /
ENV KVM_START_PATH=/run.expect

# Add telnetd config
COPY telnet /etc/xinetd.d/
Empty file added base-containers/kvm/README.md
Empty file.
11 changes: 11 additions & 0 deletions base-containers/kvm/qemu-kvm.spec.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
--- qemu-kvm.spec 2023-05-22 13:26:12.087046202 +0000
+++ /root/rpmbuild/SPECS/qemu-kvm.spec 2023-05-22 12:40:09.838715272 +0000
@@ -652,6 +652,8 @@
Patch256: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch
# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0]
Patch257: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch
+# Enable 9P virtfs
+Patch258: virtio-9p-pci.patch

BuildRequires: wget
BuildRequires: rpm-build
11 changes: 11 additions & 0 deletions base-containers/kvm/run.expect
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/expect -f

log_user 0
set timeout 600
spawn /setup.sh
expect "virtme-init: console is ttyS0\r"
send -- "ip a add 10.0.2.15/24 dev enp0s2\r"
send -- "ip l set dev enp0s2 up\r"
send -- "touch /tmp/student/.telnet\r"
send -- "stdbuf -oL xinetd -d\r"
wait
28 changes: 28 additions & 0 deletions base-containers/kvm/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#! /bin/bash -x

# Load env variables
STUDENT_DIR=/task/student
KVM_DIR="${STUDENT_DIR}/kvm"
SCRIPTS_DIR="${STUDENT_DIR}/scripts"
STUDENT_LOGIN="${SCRIPTS_DIR}/student_login"

# Set kvm group in human-readable way
groupdel kvm
groupadd -g $(stat -c '%g' /dev/kvm) kvm

# Create RW dir mounted in the KVM
mkdir "${KVM_DIR}"
chown worker:worker /task
chown worker:worker "${KVM_DIR}"

# Copy the kernel in a path readable by "worker" within the SSH container
cp "${SCRIPTS_DIR}/bzImage" /tmp

# Copy student_login file, if any, in a path readable by "worker" within the VM
if [[ -f "${STUDENT_LOGIN}" ]]
then
cp "${STUDENT_LOGIN}" /
fi

# Launch the KVM as "worker"
su - worker -G worker -G kvm -c "virtme-run --cpus 2 --memory 256 --kimg /tmp/bzImage --rwdir=/tmp/student=${KVM_DIR}"
10 changes: 10 additions & 0 deletions base-containers/kvm/telnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
service telnet
{
flags = REUSE IPv4
socket_type = stream
wait = no
user = root
server = /usr/sbin/telnetd
server_args = -E /telnet_login.sh
disable = no
}
21 changes: 21 additions & 0 deletions base-containers/kvm/telnet_login.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#! /bin/bash

FIRST=/tmp/.first
STUDENT_LOGIN="/student_login"

if [[ ! -f "${FIRST}" ]]
then
# On first login within the KVM
touch "${FIRST}"

if [[ -f "${STUDENT_LOGIN}" ]]
then
# If the task specifies a given setup to launch (e.g. a mininet script), run it
./"${STUDENT_LOGIN}"
else
# Else, simply spawn a shell in the KVM
/bin/bash
fi
else
/bin/bash
fi
10 changes: 10 additions & 0 deletions base-containers/kvm/virtio-9p-pci.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
+++ a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak 2023-05-22 12:17:06.102529121 +0000
--- /dev/null
@@ -90,6 +90,7 @@
CONFIG_VHOST_USER_BLK=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_VGA=y
+CONFIG_VIRTIO_9P=y
CONFIG_VMMOUSE=y
CONFIG_VMPORT=y
CONFIG_VTD=y
25 changes: 25 additions & 0 deletions base-containers/kvm/virtme.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
diff --git a/virtme/architectures.py b/virtme/architectures.py
index ba16138..6bc5e42 100644
--- a/virtme/architectures.py
+++ b/virtme/architectures.py
@@ -71,7 +71,7 @@ class Arch_x86(Arch):
ret = Arch.qemuargs(is_native)

# Add a watchdog. This is useful for testing.
- ret.extend(['-watchdog', 'i6300esb'])
+ ret.extend(['-device', 'i6300esb', '-action', 'watchdog=pause'])

if is_native and os.access('/dev/kvm', os.R_OK):
# If we're likely to use KVM, request a full-featured CPU.
diff --git a/virtme/commands/run.py b/virtme/commands/run.py
index 8cecb07..9b082c0 100644
--- a/virtme/commands/run.py
+++ b/virtme/commands/run.py
@@ -367,6 +367,7 @@ def do_it() -> int:
# Set up / override baseline devices
qemuargs.extend(['-parallel', 'none'])
qemuargs.extend(['-net', 'none'])
+ qemuargs.extend(['-nic', 'user,hostfwd=tcp::2223-:23'])

if not args.graphics and not args.script_sh and not args.script_exec:
# It would be nice to use virtconsole, but it's terminally broken
3 changes: 2 additions & 1 deletion inginious-agent-docker
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ if __name__ == "__main__":
parser.add_argument("--disable-autorestart", help="Disables the auto restart on agent failure.", action="store_true")
parser.add_argument("--ssh", help="Allow this agent to handle tasks with ssh features", action="store_true",
default=False)
parser.add_argument("--kvm", help="Enable KVM passthrough on the Docker Agent.", action="store_true", default=False)
parser.add_argument("--runtime", nargs='+', action=RuntimeParser,
help="Add a runtime. Expects at least 2 arguments: the name of the runtime (eg runc), "
"the name of the environment type (eg docker or kata). You can then add flags:\n"
Expand Down Expand Up @@ -123,7 +124,7 @@ if __name__ == "__main__":
# Create agent
agent = DockerAgent(context, args.backend, args.friendly_name, args.concurrency, fsprovider,
address_host=args.debug_host, external_ports=args.debug_ports, tmp_dir=args.tmpdir,
runtimes=args.runtime, ssh_allowed=args.ssh)
runtimes=args.runtime, ssh_allowed=args.ssh, kvm_allowed=args.kvm)

# Run!
try:
Expand Down
14 changes: 11 additions & 3 deletions inginious/agent/docker_agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class DockerRunningStudentContainer:

class DockerAgent(Agent):
def __init__(self, context, backend_addr, friendly_name, concurrency, tasks_fs: FileSystemProvider,
address_host=None, external_ports=None, tmp_dir="./agent_tmp", runtimes=None, ssh_allowed=False):
address_host=None, external_ports=None, tmp_dir="./agent_tmp", runtimes=None, ssh_allowed=False, kvm_allowed=False):
"""
:param context: ZeroMQ context for this process
:param backend_addr: address of the backend (for example, "tcp://127.0.0.1:2222")
Expand All @@ -78,6 +78,7 @@ def __init__(self, context, backend_addr, friendly_name, concurrency, tasks_fs:
:param type: type of the container ("docker" or "kata")
:param runtime: runtime used by docker (the defaults are "runc" with docker or "kata-runtime" with kata)
:param ssh_allowed: boolean to make this agent accept tasks with ssh or not
:param kvm_allowed: boolean to enable KVM passthrough on the agent
"""
super(DockerAgent, self).__init__(context, backend_addr, friendly_name, concurrency, tasks_fs)

Expand All @@ -103,6 +104,12 @@ def __init__(self, context, backend_addr, friendly_name, concurrency, tasks_fs:
# Does this agent allow ssh_student ?
self._ssh_allowed = ssh_allowed

# Does this agent allow KVM passthrough ?
self._kvm_allowed = kvm_allowed
if self._kvm_allowed and not os.path.exists('/dev/kvm'):
self._logger.warning("KVM passthrough seems to not be enabled on the Agent. KVM-specific tasks will be rejected.")
self._kvm_allowed = False

async def _init_clean(self):
""" Must be called when the agent is starting """
# Data about running containers
Expand Down Expand Up @@ -383,7 +390,7 @@ def __new_job_sync(self, message: BackendNewJob, future_results):
sockets_path, course_common_path,
course_common_student_path,
self.__get_fd_limit(), runtime,
ports)
ports, self._kvm_allowed)
except Exception as e:
self._logger.warning("Cannot create container! %s", str(e), exc_info=True)
shutil.rmtree(container_path)
Expand Down Expand Up @@ -493,7 +500,8 @@ async def create_student_container(self, parent_info, socket_id, environment_nam
parent_info.environment_type,
self.__get_fd_limit(),
parent_info.container_id if share_network else None,
ports)
ports,
self._kvm_allowed)
except Exception as e:
self._logger.exception("Cannot create student container!")
await self._write_to_container_stdin(write_stream, {"type": "run_student_retval", "retval": 254,
Expand Down
Loading
Loading