cgroup kata container

https://github.com/kata-containers/documentation/blob/master/design/host-cgroups.md

https://zhuanlan.zhihu.com/p/105230155

https://blog.csdn.net/yuchunyu97/article/details/109241723

https://github.com/kata-containers/runtime/issues/2090

root@ubuntu:/opt/gopath/src/github.com/kata-containers# kata-runtime kata-env | grep SandboxCgroup
SandboxCgroupOnly = false
root@ubuntu:/opt/gopath/src/github.com/kata-containers#

// constrainHypervisor will place the VMM and vCPU threads into cgroups.
func (s *Sandbox) constrainHypervisor(cgroup cgroups.Cgroup) error {
    // VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
    // This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
    // Kata/VMM into account, Kata may fail to boot due to being overconstrained.
    // If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained
    // cgroup
    if s.config.SandboxCgroupOnly {
        // Kata components were moved into the sandbox-cgroup already, so VMM
        // will already land there as well. No need to take action
        return nil
    }

    pids := s.hypervisor.getPids()
    if len(pids) == 0 || pids[0] == 0 {
        return fmt.Errorf("Invalid hypervisor PID: %+v", pids)
    }

    // VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
    // This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
    // Kata/VMM into account, Kata may fail to boot due to being overconstrained.
    // If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained
    // cgroup
    // Move the VMM into cgroups without constraints, those cgroups are not yet supported.
    resources := &specs.LinuxResources{}
    path := cgroupNoConstraintsPath(s.state.CgroupPath)
    vmmCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
    if err != nil {
        return fmt.Errorf("Could not create cgroup %v: %v", path, err)
    }

    for _, pid := range pids {
        if pid <= 0 {
            s.Logger().Warnf("Invalid hypervisor pid: %d", pid)
            continue
        }

        if err := vmmCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
            return fmt.Errorf("Could not add hypervisor PID %d to cgroup: %v", pid, err)
        }
    }

    // when new container joins, new CPU could be hotplugged, so we
    // have to query fresh vcpu info from hypervisor every time.
    tids, err := s.hypervisor.getThreadIDs()
    if err != nil {
        return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
    }
    if len(tids.vcpus) == 0 {
        // If there's no tid returned from the hypervisor, this is not
        // a bug. It simply means there is nothing to constrain, hence
        // let's return without any error from here.
        return nil
    }

    // Move vcpus (threads) into cgroups with constraints.
    // Move whole hypervisor process would be easier but the IO/network performance
    // would be over-constrained.
    for _, i := range tids.vcpus {
        // In contrast, AddTask will write thread id to `tasks`
        // After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
        // qemu will be left in parent cgroup untouched.
        if err := cgroup.AddTask(cgroups.Process{
            Pid: i,
        }); err != nil {
            return err
        }
    }

    return nil
}