virtiofsd

virtio-fs介绍

    • 在guest之间共享文件系统的方案

    • virtio-fs把文件mmap进qemu的进程地址空间并让不同guest使用DAX访问该内存空间

    • DAX数据访问和元数据的共享内存访问都是通过共享内存的方式避免不必要的VM/hypervisor之间通信(在元数据没有改变的情况下)

      1. Kata Containers utilizes the Linux kernel DAX (Direct Access filesystem)
        feature to efficiently map some host-side files into the guest VM space.

参考https://www.cnblogs.com/yi-mu-xi/p/12923523.html

root@cloud:~# mount | grep fuse
fusectl on /sys/fs/fuse/connections type fusectl (rw,relatime)
lxcfs on /var/lib/lxcfs type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other)
root@cloud:~# 
[root@bogon virtio]# ls
Kconfig          vhost.c               vhost-user.c           vhost-vsock.c      virtio-balloon-pci.c  virtio-crypto.c          virtio-mmio.c     virtio-rng.c
Makefile.objs    vhost-scsi-pci.c      vhost-user-fs.c        vhost-vsock-pci.c  virtio-blk-pci.c      virtio-crypto-pci.c      virtio-net-pci.c  virtio-rng-pci.c
trace-events     vhost-stub.c          vhost-user-fs-pci.c    virtio-9p-pci.c    virtio-bus.c          virtio-input-host-pci.c  virtio-pci.c      virtio-scsi-pci.c
vhost-backend.c  vhost-user-blk-pci.c  vhost-user-scsi-pci.c  virtio-balloon.c   virtio.c              virtio-input-pci.c       virtio-pci.h      virtio-serial-pci.c
[root@bogon virtio]# 

原理与架构设计

virtio-fs方案使用FUSE协议在host和guest之间通信。在host端实现一个fuse server操作host上的文件,然后把guest kernel当作fuse client在guest内挂载fuse,server和client之间使用virtio来做传输层来承载FUSE协议,而不是传统结构上的/dev/fuse设备。为了支持在不同guest中同时mmap(MAP_SHARED)同一个文件,virtio-fs把文件mmap进qemu的进程地址空间并让不同guest使用DAX访问该内存空间,这样就绕过了guest pagecache达到不同guest都访问同一份数据的目的,同时也在多个guest之间共享了内存,节省了内存资源。

简要架构图: 1.png

从图中我们可以了解到,virtio-fs主要由以下几个组件组成:

  • guest kernel作为fuse client来挂载host上导出的目录
  • qemu新添加的vhost-user-fs-pci设备用于在guest kernel和virtiofsd之间建立起vhost-user连接
  • virtiofsd(同样在qemu仓库中):host上运行的基于libfuse开发的fuse daemon,用于向guest提供fuse服务
qemu-system-x86_64 –chardev socket,id=char0,path=/tmp/vhost-fs.socket –device vhost-user-fs-pci,chardev=char0,tag=myfs,cache-size=2G
vhost-user-fs-pci
hw/virtio/vhost-user-fs-pci.c:27:#define TYPE_VHOST_USER_FS_PCI "vhost-user-fs-pci-base"
hw/virtio/vhost-user-fs-pci.c:90:    .non_transitional_name = "vhost-user-fs-pci",
427 static void vuf_class_init(ObjectClass *klass, void *data)
428 {
429     DeviceClass *dc = DEVICE_CLASS(klass);
430     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
431 
432     dc->props = vuf_properties;
433     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
434     vdc->realize = vuf_device_realize;
435     vdc->unrealize = vuf_device_unrealize;
436     vdc->get_features = vuf_get_features;
437     vdc->get_config = vuf_get_config;
438     vdc->set_status = vuf_set_status;
439     vdc->guest_notifier_mask = vuf_guest_notifier_mask;
440     vdc->guest_notifier_pending = vuf_guest_notifier_pending;
441 }
 
 88 static const VirtioPCIDeviceTypeInfo vhost_user_fs_pci_info = {
 89     .base_name             = TYPE_VHOST_USER_FS_PCI,
 90     .non_transitional_name = "vhost-user-fs-pci",
 91     .instance_size = sizeof(VHostUserFSPCI),
 92     .instance_init = vhost_user_fs_pci_instance_init,
 93     .class_init    = vhost_user_fs_pci_class_init,
 94 };
vhost_user_fs_pci_register
 96 static void vhost_user_fs_pci_register(void)
 97 {
 98     virtio_pci_types_register(&vhost_user_fs_pci_info);
 99 }
100 
101 type_init(vhost_user_fs_pci_register);
hw/virtio/vhost-user-fs-pci.c:15:#include "hw/virtio/vhost-user-fs.h"
hw/virtio/vhost-user-fs-pci.c:27:#define TYPE_VHOST_USER_FS_PCI "vhost-user-fs-pci-base"
hw/virtio/vhost-user-fs-pci.c:90:    .non_transitional_name = "vhost-user-fs-pci",
hw/virtio/vhost-user-fs.c:21:#include "hw/virtio/vhost-user-fs.h"
hw/virtio/vhost-user-fs.c:367:    virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS,
hw/virtio/vhost-user.c:15:#include "hw/virtio/vhost-user-fs.h"
vuf_device_realize
303 static void vuf_device_realize(DeviceState *dev, Error **errp)
304 {
367     virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS,
368                 sizeof(struct virtio_fs_config));
}
427 static void vuf_class_init(ObjectClass *klass, void *data)
428 {
429     DeviceClass *dc = DEVICE_CLASS(klass);
430     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
431 
432     dc->props = vuf_properties;
433     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
434     vdc->realize = vuf_device_realize;
435     vdc->unrealize = vuf_device_unrealize;
436     vdc->get_features = vuf_get_features;
437     vdc->get_config = vuf_get_config;
438     vdc->set_status = vuf_set_status;
439     vdc->guest_notifier_mask = vuf_guest_notifier_mask;
440     vdc->guest_notifier_pending = vuf_guest_notifier_pending;
441 }
vuf_device_realize
    vhost_user_init
2190 int main(int argc, char *argv[])
2191 {


   2276     se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
2277     if (se == NULL)
2278         goto err_out1;
2279 
2280     if (fuse_set_signal_handlers(se) != 0)
2281         goto err_out2;
2282 
2283     if (fuse_session_mount(se) != 0)
2284         goto err_out3;


}
2645 int fuse_session_mount(struct fuse_session *se)
2646 {
2647         return virtio_session_mount(se);
2648 }
721 
722 int virtio_session_mount(struct fuse_session *se)
723 {
724         struct sockaddr_un un;
725 
726         if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) {
727                 fprintf(stderr, "Socket path too long
");
728                 return -1;
729         }
730 
731         /* Poison the fuse FD so we spot if we accidentally use it;
732          * DO NOT check for this value, check for se->vu_socket_path
733          */
734         se->fd = 0xdaff0d11;
735 
736         /* Create the Unix socket to communicate with qemu
737          * based on QEMU's vhost-user-bridge
738          */
739         unlink(se->vu_socket_path);
740         strcpy(un.sun_path, se->vu_socket_path);
741         size_t addr_len = sizeof(un.sun_family) + strlen(se->vu_socket_path);
742 
743         int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
744         if (listen_sock == -1) {
745                perror("vhost socket creation");
746                return -1;
747         }
748         un.sun_family = AF_UNIX;
749 
750         if (bind(listen_sock, (struct sockaddr *) &un, addr_len) == -1) {
751                 perror("vhost socket bind");
752                 return -1;
753         }
754 
755         if (listen(listen_sock, 1) == -1) {
756                 perror("vhost socket listen");
757                 return -1;
758         }
759 
760         fprintf(stderr, "%s: Waiting for vhost-user socket connection...
", __func__);
761         int data_sock = accept(listen_sock, NULL, NULL);
762         if (data_sock == -1) {
763                 perror("vhost socket accept");
764                 close(listen_sock);
765                 return -1;
766         }
767         close(listen_sock);
768         fprintf(stderr, "%s: Received vhost-user socket connection
", __func__);
769         se->vu_socketfd = data_sock;

770 
771         /* TODO: Some cleanup/deallocation! */
772         se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1);
773         se->virtio_dev->se = se;
774         vu_init(&se->virtio_dev->dev, se->vu_socketfd,
775                 fv_panic,
776                 fv_set_watch, fv_remove_watch,
777                 &fv_iface);
778 
779         return 0;
780 }
原文地址:https://www.cnblogs.com/dream397/p/13867752.html