kvm + qemu + kvm ioctl

kvm_cpu_exec  --> kvm_vcpu_ioctl(cpu, KVM_RUN, 0)
static void *kvm_vcpu_thread_fn(void *arg)
{
    CPUState *cpu = arg;
    int r;

    rcu_register_thread();

    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
    cpu->can_do_io = 1;
    current_cpu = cpu;

    r = kvm_init_vcpu(cpu, &error_fatal);
    kvm_init_cpu_signals(cpu);

    /* signal CPU creation */
    cpu_thread_signal_created(cpu);
    qemu_guest_random_seed_thread_part2(cpu->random_seed);

    do {
        if (cpu_can_run(cpu)) {
            r = kvm_cpu_exec(cpu);
            if (r == EXCP_DEBUG) {
                cpu_handle_guest_debug(cpu);
            }
        }
        qemu_wait_io_event(cpu);
    } while (!cpu->unplug || cpu_can_run(cpu));

    kvm_destroy_vcpu(cpu);
    cpu_thread_signal_destroyed(cpu);
    qemu_mutex_unlock_iothread();
    rcu_unregister_thread();
    return NULL;
}
void *kvm_cpu_thread(void *data) {
    struct kvm *kvm = (struct kvm *)data;
    int ret = 0;
    kvm_reset_vcpu(kvm->vcpus);

    while (1) {
        printf("KVM start run
");
        ret = ioctl(kvm->vcpus->vcpu_fd, KVM_RUN, 0);
    
        if (ret < 0) {
            fprintf(stderr, "KVM_RUN failed
");
            exit(1);
        }

        switch (kvm->vcpus->kvm_run->exit_reason) {
        case KVM_EXIT_UNKNOWN:
            printf("KVM_EXIT_UNKNOWN
");
            break;
        case KVM_EXIT_DEBUG:
            printf("KVM_EXIT_DEBUG
");
            break;
        case KVM_EXIT_IO:
            printf("KVM_EXIT_IO
");
            printf("out port: %d, data: %d
", 
                kvm->vcpus->kvm_run->io.port,  
                *(int *)((char *)(kvm->vcpus->kvm_run) + kvm->vcpus->kvm_run->io.data_offset)
                );
            sleep(1);
            break;
        case KVM_EXIT_MMIO:
            printf("KVM_EXIT_MMIO
");
            break;
        case KVM_EXIT_INTR:
            printf("KVM_EXIT_INTR
");
            break;
        case KVM_EXIT_SHUTDOWN:
            printf("KVM_EXIT_SHUTDOWN
");
            goto exit_kvm;
            break;
        default:
            printf("KVM PANIC
");
            goto exit_kvm;
        }
    }

exit_kvm:
    return 0;
}

/dev/kvm

[root@localhost cloud_images]# lsof /dev/kvm 
COMMAND     PID USER   FD   TYPE DEVICE SIZE/OFF NODE NAME
qemu-syst 50066 root   14u   CHR 10,232      0t0 1103 /dev/kvm
[root@localhost cloud_images]# ps -elf | grep 50066
3 S root      50066      1 40  80   0 - 83896 poll_s 02:53 ?        00:00:07 qemu-system-aarch64 -name vm2 -daemonize -enable-kvm -M virt -cpu host -smp 2 -m 4096 -global virtio-blk-device.scsi=off -device virtio-scsi-device,id=scsi -kernel vmlinuz-4.18 --append console=ttyAMA0  root=UUID=6a09973e-e8fd-4a6d-a8c0-1deb9556f477 -initrd initramfs-4.18 -drive file=vhuser-test1.qcow2 -netdev user,id=unet,hostfwd=tcp:127.0.0.1:1122-:22 -device virtio-net-device,netdev=unet -vnc :10
0 S root      50093  48588  0  80   0 -  1729 pipe_w 02:54 pts/1    00:00:00 grep --color=auto 50066
[root@localhost cloud_images]# 

 ioctl系统调用sys_ioctl->ksys_ioctl->do_vfs_ioctl->vfs_ioctl->unlocked_ioctl->kvm_vcpu_ioctl

kvm_vm_ioctl
accel/kvm/kvm-all.c:2623:int kvm_vm_ioctl(KVMState *s, int type, ...)
include/sysemu/kvm.h:276:int kvm_vm_ioctl(KVMState *s, int type, ...);

KVM_IOEVENTFD

 Qemu中建立ioeventfd的处理流程:

virtio_pci_config_write
 |-->virtio_ioport_write
      |-->virtio_pci_start_ioeventfd
          |-->virtio_pci_set_host_notifier_internal
              |-->virtio_queue_set_host_notifier_fd_handler
              |-->memory_region_add_eventfd
                  |-->memory_region_transaction_commit
                      |-->address_space_update_ioeventfds
                          |-->address_space_add_del_ioeventfds
                              |-->eventfd_add[kvm_mem_ioeventfd_add]
                                  |-->kvm_set_ioeventfd_mmio
                                      |-->kvm_vm_ioctl(...,KVM_IOEVENTFD,...)

KVM_IRQFD

static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event,
                                    EventNotifier *resample, int virq,
                                    bool assign)
{
    int fd = event_notifier_get_fd(event);
    int rfd = resample ? event_notifier_get_fd(resample) : -1;

    struct kvm_irqfd irqfd = {
        .fd = fd,
        .gsi = virq,
        .flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
    };

    if (rfd != -1) {
        assert(assign);
        if (kvm_irqchip_is_split()) {
            /*
             * When the slow irqchip (e.g. IOAPIC) is in the
             * userspace, KVM kernel resamplefd will not work because
             * the EOI of the interrupt will be delivered to userspace
             * instead, so the KVM kernel resamplefd kick will be
             * skipped.  The userspace here mimics what the kernel
             * provides with resamplefd, remember the resamplefd and
             * kick it when we receive EOI of this IRQ.
             *
             * This is hackery because IOAPIC is mostly bypassed
             * (except EOI broadcasts) when irqfd is used.  However
             * this can bring much performance back for split irqchip
             * with INTx IRQs (for VFIO, this gives 93% perf of the
             * full fast path, which is 46% perf boost comparing to
             * the INTx slow path).
             */
            kvm_resample_fd_insert(virq, resample);
        } else {
            irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE;
            irqfd.resamplefd = rfd;
        }
    } else if (!assign) {
        if (kvm_irqchip_is_split()) {
            kvm_resample_fd_remove(virq);
        }
    }

    if (!kvm_irqfds_enabled()) {
        return -ENOSYS;
    }

    return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd);
}
kvm_vm_ioctl
static long kvm_vm_ioctl(struct file *filp,
               unsigned int ioctl, unsigned long arg)
{
    struct kvm *kvm = filp->private_data;
    void __user *argp = (void __user *)arg;
    int r;

    if (kvm->mm != current->mm)
        return -EIO;
    switch (ioctl) {
    case KVM_CREATE_VCPU:
        r = kvm_vm_ioctl_create_vcpu(kvm, arg);
        break;
    case KVM_ENABLE_CAP: {
        struct kvm_enable_cap cap;

        r = -EFAULT;
        if (copy_from_user(&cap, argp, sizeof(cap)))
            goto out;
        r = kvm_vm_ioctl_enable_cap_generic(kvm, &cap);
        break;
    }
    case KVM_SET_USER_MEMORY_REGION: {
        struct kvm_userspace_memory_region kvm_userspace_mem;

        r = -EFAULT;
        if (copy_from_user(&kvm_userspace_mem, argp,
                        sizeof(kvm_userspace_mem)))
            goto out;

        r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
        break;
    }
    case KVM_GET_DIRTY_LOG: {
        struct kvm_dirty_log log;

        r = -EFAULT;
        if (copy_from_user(&log, argp, sizeof(log)))
            goto out;
        r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
        break;
    }
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
    case KVM_CLEAR_DIRTY_LOG: {
        struct kvm_clear_dirty_log log;

        r = -EFAULT;
        if (copy_from_user(&log, argp, sizeof(log)))
            goto out;
        r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
        break;
    }
#endif
#ifdef CONFIG_KVM_MMIO
    case KVM_REGISTER_COALESCED_MMIO: {
        struct kvm_coalesced_mmio_zone zone;

        r = -EFAULT;
        if (copy_from_user(&zone, argp, sizeof(zone)))
            goto out;
        r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
        break;
    }
    case KVM_UNREGISTER_COALESCED_MMIO: {
        struct kvm_coalesced_mmio_zone zone;

        r = -EFAULT;
        if (copy_from_user(&zone, argp, sizeof(zone)))
            goto out;
        r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
        break;
    }
#endif
    case KVM_IRQFD: {
        struct kvm_irqfd data;

        r = -EFAULT;
        if (copy_from_user(&data, argp, sizeof(data)))
            goto out;
        r = kvm_irqfd(kvm, &data);
        break;
    }
    case KVM_IOEVENTFD: {
        struct kvm_ioeventfd data;

        r = -EFAULT;
        if (copy_from_user(&data, argp, sizeof(data)))
            goto out;
        r = kvm_ioeventfd(kvm, &data);
        break;
    }
#ifdef CONFIG_HAVE_KVM_MSI
    case KVM_SIGNAL_MSI: {
        struct kvm_msi msi;

        r = -EFAULT;
        if (copy_from_user(&msi, argp, sizeof(msi)))
            goto out;
        r = kvm_send_userspace_msi(kvm, &msi);
        break;
    }
#endif
#ifdef __KVM_HAVE_IRQ_LINE
    case KVM_IRQ_LINE_STATUS:
    case KVM_IRQ_LINE: {
        struct kvm_irq_level irq_event;

        r = -EFAULT;
        if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
            goto out;

        r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
                    ioctl == KVM_IRQ_LINE_STATUS);
        if (r)
            goto out;

        r = -EFAULT;
        if (ioctl == KVM_IRQ_LINE_STATUS) {
            if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
                goto out;
        }

        r = 0;
        break;
    }
#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
    case KVM_SET_GSI_ROUTING: {
        struct kvm_irq_routing routing;
        struct kvm_irq_routing __user *urouting;
        struct kvm_irq_routing_entry *entries = NULL;

        r = -EFAULT;
        if (copy_from_user(&routing, argp, sizeof(routing)))
            goto out;
        r = -EINVAL;
        if (!kvm_arch_can_set_irq_routing(kvm))
            goto out;
        if (routing.nr > KVM_MAX_IRQ_ROUTES)
            goto out;
        if (routing.flags)
            goto out;
        if (routing.nr) {
            urouting = argp;
            entries = vmemdup_user(urouting->entries,
                           array_size(sizeof(*entries),
                              routing.nr));
            if (IS_ERR(entries)) {
                r = PTR_ERR(entries);
                goto out;
            }
        }
        r = kvm_set_irq_routing(kvm, entries, routing.nr,
                    routing.flags);
        kvfree(entries);
        break;
    }
#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
    case KVM_CREATE_DEVICE: {
        struct kvm_create_device cd;

        r = -EFAULT;
        if (copy_from_user(&cd, argp, sizeof(cd)))
            goto out;

        r = kvm_ioctl_create_device(kvm, &cd);
        if (r)
            goto out;

        r = -EFAULT;
        if (copy_to_user(argp, &cd, sizeof(cd)))
            goto out;

        r = 0;
        break;
    }
    case KVM_CHECK_EXTENSION:
        r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
        break;
    default:
        r = kvm_arch_vm_ioctl(filp, ioctl, arg);
    }
out:
    return r;
}
原文地址:https://www.cnblogs.com/dream397/p/13935372.html