Tuesday, January 28, 2020
My name is Christophe de Dinechin. I have been working at Red Hat for just over three years now, and I think it's about time for me to setup a blog specifically for the stuff I work on at Red Hat which I believe is worth sharing or keeping for future reference.
This blog is written using Blogmax, a very simple blogging package for Emacs.
There is a tips section and a bugs section on this blog, but they are just copy-pasted from stuff I did a few years ago, so for now they are highly obsolete.
The name of this blog is Red Skin Cat, which translates into French as Chat Peau Rouge, which pronounces exactly as Chapeau Rouge, the French translation of Red Hat.
There will be older entries as I put in this blog some stuff that I found useful in the recent weeks.
Kata containers security
There is a bit of concern regarding the security of Kata Containers. To make a long story short, the issue is that Kata Containers calls qemu directly, in essense being responsible for security, but they do not fully "secure" the resulting qemu process the way libvirt does on RHEL or Fedora.
So the debate, at least for me, is whether it's reasonable to have qemu run in a somewhat insecure manner when you run it as root, i.e. without activating the kind of security you get from, say, virsh start.
Currently, Kata Conrtainers will run qemu with a command line that looks like this:
/usr/bin/qemu-lite-system-x86_64 -name sandbox-23223acca4d1945d21cae8cdbd9059a8adb5e9b3a766e01ce7e8c194dcf0049d -uuid e7573de4-f4af-4a82-9b69-dbb72e24cb0a -machine pc,accel=kvm,kernel_irqchip,nvdimm -cpu host,pmu=off -qmp unix:/run/vc/vm/23223acca4d1945d21cae8cdbd9059a8adb5e9b3a766e01ce7e8c194dcf0049d/qmp.sock,server,nowait -m 2048M,slots=10,maxmem=8906M -device pci-bridge,bus=pci.0,id=pci-bridge-0,chassis_nr=1,shpc=on,addr=2,romfile= -device virtio-serial-pci,disable-modern=true,id=serial0,romfile= -device virtconsole,chardev=charconsole0,id=console0 -chardev socket,id=charconsole0,path=/run/vc/vm/23223acca4d1945d21cae8cdbd9059a8adb5e9b3a766e01ce7e8c194dcf0049d/console.sock,server,nowait -device nvdimm,id=nv0,memdev=mem0 -object memory-backend-file,id=mem0,mem-path=/usr/share/kata-containers/kata-containers-imageclearlinux_1.8.0-alpha2_agente40d7749dc.img,size=134217728 -device virtio-scsi-pci,id=scsi0,disable-modern=true,romfile= -object rng-random,id=rng0,filename=/dev/urandom -device virtio-rng,rng=rng0,romfile= -device virtserialport,chardev=charch0,id=channel0,name=agent.channel.0 -chardev socket,id=charch0,path=/run/vc/vm/23223acca4d1945d21cae8cdbd9059a8adb5e9b3a766e01ce7e8c194dcf0049d/kata.sock,server,nowait -device virtio-9p-pci,disable-modern=true,fsdev=extra-9p-kataShared,mount_tag=kataShared,romfile= -fsdev local,id=extra-9p-kataShared,path=/run/kata-containers/shared/sandboxes/23223acca4d1945d21cae8cdbd9059a8adb5e9b3a766e01ce7e8c194dcf0049d,security_model=none -netdev tap,id=network-0,vhost=on,vhostfds=3,fds=4 -device driver=virtio-net-pci,netdev=network-0,mac=2e:b0:15:b4:d0:6f,disable-modern=true,mq=on,vectors=4,romfile= -global kvm-pit.losttickpolicy=discard -vga none -no-user-config -nodefaults -nographic -daemonize -kernel /usr/share/kata-containers/vmlinuz-4.19.28.42-48.1.container -append tsc=reliable notimercheck rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k console=hvc0 console=hvc1 iommu=off cryptomgr.notests net.ifnames=0 pci=lastbus=0 root=/dev/pmem0p1 rootflags=dax,data=ordered,errors=remount-ro ro rootfstype=ext4 quiet systemd.show_status=false panic=1 nr_cpus=1 agent.use_vsock=false init=/usr/lib/systemd/systemd systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service systemd.mask=systemd-networkd.socket -pidfile /run/vc/vm/23223acca4d1945d21cae8cdbd9059a8adb5e9b3a766e01ce7e8c194dcf0049d/pid -smp 1,cores=1,threads=1,sockets=1,maxcpus=1
From a command-line point of view, it is not completely different from the way libvirt runs it:
/usr/bin/qemu-system-x86_64 -name guest=f32-turbo,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-1-f32-turbo/master-key.aes -machine pc-q35-3.1,accel=kvm,usb=off,vmport=off,dump-guest-core=off -cpu Broadwell-IBRS,vme=on,ss=on,vmx=on,f16c=on,rdrand=on,hypervisor=on,arat=on,tsc-adjust=on,umip=on,md-clear=on,stibp=on,arch-capabilities=on,ssbd=on,xsaveopt=on,pdpe1gb=on,abm=on,ibpb=on,amd-ssbd=on,skip-l1dfl-vmentry=on -m 8192 -overcommit mem-lock=off -smp 12,sockets=12,cores=1,threads=1 -uuid f5af93eb-115f-4e77-87f8-eb7a7a533587 -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=34,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc,driftfix=slew -global kvm-pit.losttickpolicy=delay -no-hpet -no-shutdown -global ICH9-LPC.disable_s3=1 -global ICH9-LPC.disable_s4=1 -boot strict=on -device pcie-root-port,port=0x10,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,addr=0x2 -device pcie-root-port,port=0x11,chassis=2,id=pci.2,bus=pcie.0,addr=0x2.0x1 -device pcie-root-port,port=0x12,chassis=3,id=pci.3,bus=pcie.0,addr=0x2.0x2 -device pcie-root-port,port=0x13,chassis=4,id=pci.4,bus=pcie.0,addr=0x2.0x3 -device pcie-root-port,port=0x14,chassis=5,id=pci.5,bus=pcie.0,addr=0x2.0x4 -device pcie-root-port,port=0x15,chassis=6,id=pci.6,bus=pcie.0,addr=0x2.0x5 -device pcie-root-port,port=0x16,chassis=7,id=pci.7,bus=pcie.0,addr=0x2.0x6 -device qemu-xhci,p2=15,p3=15,id=usb,bus=pci.2,addr=0x0 -device virtio-serial-pci,id=virtio-serial0,bus=pci.3,addr=0x0 -blockdev {"driver":"file","filename":"/var/lib/libvirt/images/f30-turbo.qcow2","node-name":"libvirt-2-storage","auto-read-only":true,"discard":"unmap"} -blockdev {"node-name":"libvirt-2-format","read-only":false,"driver":"qcow2","file":"libvirt-2-storage","backing":null} -device virtio-blk-pci,scsi=off,bus=pci.4,addr=0x0,drive=libvirt-2-format,id=virtio-disk0,bootindex=1 -device ide-cd,bus=ide.0,id=sata0-0-0 -netdev tap,fd=36,id=hostnet0,vhost=on,vhostfd=37 -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:a5:5b:bc,bus=pci.1,addr=0x0 -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 -chardev socket,id=charchannel0,fd=38,server,nowait -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=org.qemu.guest_agent.0 -chardev spicevmc,id=charchannel1,name=vdagent -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel1,id=channel1,name=com.redhat.spice.0 -device usb-tablet,id=input0,bus=usb.0,port=1 -spice port=5900,addr=127.0.0.1,disable-ticketing,image-compression=off,seamless-migration=on -device qxl-vga,id=video0,ramsize=67108864,vram_size=67108864,vram64_size_mb=0,vgamem_mb=16,maxoutputs=1,bus=pcie.0,addr=0x1 -device ich9-intel-hda,id=sound0,bus=pcie.0,addr=0x1b -device hda-duplex,id=sound0-codec0,bus=sound0.0,cad=0 -chardev spicevmc,id=charredir0,name=usbredir -device usb-redir,chardev=charredir0,id=redir0,bus=usb.0,port=2 -chardev spicevmc,id=charredir1,name=usbredir -device usb-redir,chardev=charredir1,id=redir1,bus=usb.0,port=3 -device virtio-balloon-pci,id=balloon0,bus=pci.5,addr=0x0 -object rng-random,id=objrng0,filename=/dev/urandom -device virtio-rng-pci,rng=objrng0,id=rng0,bus=pci.6,addr=0x0 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on
There are a number of interesting differences, e.g. the large -append list. But the more important things as far as security is concerned happpens before qemu is even launched. Libvirt has created a sandbox, and for the moment, Kata Containers does not build an equivalent sandbox yet that I know of.
So the next question is: how should we build that sandbox?
Engineering
Engineering is about having fun.