ironic baremetal rescue process

1、用户调用Nova的rescue函数

nova/virt/ironic/driver.py
class IronicDriver(virt_driver.ComputeDriver):
        ......
        ......
    #导入ironicclient模块
    def __init__(self, virtapi, read_only=False):
        super(IronicDriver, self).__init__(virtapi)
        global ironic
        if ironic is None:
            ironic = importutils.import_module('ironicclient')
        ......
        self.ironicclient = client_wrapper.IronicClientWrapper()
     
    def spawn(self, context, instance, image_meta, injected_files,
          admin_password, allocations, network_info=None,
          block_device_info=None):
        ......
        #调用ironicclient.call方法,触发节点部署
        try:
             self.ironicclient.call("node.set_provision_state", node_uuid,
                                    ironic_states.ACTIVE,
                                    configdrive=configdrive_value)
        ......
        try:
            ##Virt驱动程序在等待provision_state更改时循环,并根据需要更新Nova状态
            timer.start(interval=CONF.ironic.api_retry_interval).wait()
            LOG.info('Successfully provisioned Ironic node %s',
             node.uuid, instance=instance)
View Code
ironic/api/controllers/v1/node.py
#ronic API接收set_provision_state调用,并执行do_node_rescue RPC调用
class NodeStatesController(rest.RestController):
    def provision(self, node_ident, target, configdrive=None,
              clean_steps=None, rescue_password=None):
        .....
        elif (target == ir_states.VERBS['rescue']):
            if not (rescue_password and rescue_password.strip()):
                msg = (_('A non-empty "rescue_password" is required when '
                    'setting target provision state to %s') %
                ir_states.VERBS['rescue'])
                raise wsme.exc.ClientSideError(
                    msg, status_code=http_client.BAD_REQUEST)
            pecan.request.rpcapi.do_node_rescue(
                pecan.request.context, rpc_node.uuid, rescue_password, topic)
View Code
ironic/conductor/manager.py
class ConductorManager(base_manager.BaseConductorManager):
    ......
    def do_node_rescue(self, context, node_id, rescue_password):
        ......
        #保存节点的救援密码
        instance_info = node.instance_info
        instance_info['rescue_password'] = rescue_password
        node.instance_info = instance_info
        node.save()#Ironic conductor在instance_info中设置了救援密码并将通知给相应的驱动
  
        try:
        task.driver.power.validate(task)
        task.driver.rescue.validate(task)
        task.driver.network.validate(task)
         
        try:
        task.process_event(
        'rescue',
        callback=self._spawn_worker,
        call_args=(self._do_node_rescue, task),#内部RPC方法来救援现有的节点部署
        err_handler=utils.spawn_rescue_error_handler)
         
    def _do_node_rescue(self, task):
        ......
        try:
            next_state = task.driver.rescue.rescue(task)
  
        if next_state == states.RESCUEWAIT:
        task.process_event('wait')
        elif next_state == states.RESCUE:
                task.process_event('done')
View Code
ironic/drivers/modules/agent.py
class AgentRescue(base.RescueInterface):
    .....
    #在节点上启动一个救援ramdisk
    def rescue(self, task):
        #重置电源状态
        manager_utils.node_power_action(task, states.POWER_OFF)
        #清理实例
        task.driver.boot.clean_up_instance(task)
        #取消节点的租户网络
        task.driver.network.unconfigure_tenant_networks(task)
        #为每个端口创建neutron端口以启动救援虚拟磁盘
        task.driver.network.add_rescuing_network(task)
        if CONF.agent.manage_agent_boot:
            ramdisk_opts = deploy_utils.build_agent_options(task.node)
            #使用PXE准备Ironic ramdisk的引导
            task.driver.boot.prepare_ramdisk(task, ramdisk_opts)
        #重置电源状态为POWER_ON
        manager_utils.node_power_action(task, states.POWER_ON)
         
        return states.RESCUEWAIT
View Code
ironic/drivers/modules/pxe.py
    class PXEBoot(base.BootInterface):
        ......
        def prepare_ramdisk(self, task, ramdisk_params):
            node = task.node
            mode = deploy_utils.rescue_or_deploy_mode(node)
             
            if CONF.pxe.ipxe_enabled:
                #将iPXE引导脚本呈现到HTTP根目录
                pxe_utils.create_ipxe_boot_script()
            dhcp_opts = pxe_utils.dhcp_options_for_instance(task)#检索DHCP PXE启动选项
            provider = dhcp_factory.DHCPFactory()
            provider.update_dhcp(task, dhcp_opts)#发送或更新此节点的DHCP BOOT选项
            pxe_info = _get_image_info(node, mode=mode)#为救援镜像生成TFTP文件的路径
  
            manager_utils.node_set_boot_device(task, boot_devices.PXE,
                                   persistent=persistent)
 
            if CONF.pxe.ipxe_enabled and CONF.pxe.ipxe_use_swift:
                kernel_label = '%s_kernel' % mode
                ramdisk_label = '%s_ramdisk' % mode
                pxe_info.pop(kernel_label, None)
                pxe_info.pop(ramdisk_label, None)
 
            if pxe_info:
                _cache_ramdisk_kernel(task.context, node, pxe_info)
            
View Code

ipa和ironic-conductor交互,Agent ramdisk启动后,回调/v1/lookup获取节点信息, 发送心跳

ironic/drivers/modules/agent_base_vendor.py
class HeartbeatMixin(object):
    ......
    def heartbeat(self, task, callback_url, agent_version):
        ......
        try:
            .....
            elif (node.provision_state == states.RESCUEWAIT):
                msg = _('Node failed to perform rescue operation.')
                self._finalize_rescue(task)
  
    def _finalize_rescue(self, task):
        node = task.node
        try:
            result = self._client.finalize_rescue(node)
View Code
ironic/drivers/modules/agent_client.py
class AgentClient(object):
    #指示虚拟磁盘完成救援模式的进入
    def finalize_rescue(self, node):
        #根据config drive和rescue password调用finalize_rescue(RESCUEWAIT -> RESCUING),向ipa传入rescue_password
        rescue_pass = node.instance_info.get('rescue_password')
        params = {'rescue_password': rescue_pass}
        return self._command(node=node,
                     method='rescue.finalize_rescue',
                     params=params)
  
    def _command(self, node, method, params, wait=False):
        #向ipa发送命令
        url = self._get_command_url(node)
        body = self._get_command_body(method, params)
        request_params = {
            'wait': str(wait).lower()
        try:
            response = self.session.post(url, params=request_params, data=body)
View Code
ironic_python_agent/extensions/rescue.py
PASSWORD_FILE = '/etc/ipa-rescue-config/ipa-rescue-password'
class RescueExtension(base.BaseAgentExtension):
        def finalize_rescue(self, rescue_password=""):
            self.write_rescue_password(rescue_password)
            self.agent.serve_api = False #关闭api接口
            return
  
  
        def write_rescue_password(self, rescue_password=""):
                LOG.debug('Writing hashed rescue password to %s', PASSWORD_FILE)
                salt = self.make_salt()
                hashed_password = crypt.crypt(rescue_password, salt)
                try:
                    with open(PASSWORD_FILE, 'w') as f:
                    f.write(hashed_password)#把救援密码写入到/etc/ipa-rescue-config/ipa-rescue-password
View Code
ironic/drivers/modules/agent_base_vendor.py
class HeartbeatMixin(object):
    #调用ramdisk来准备救援模式并验证结果
    def _finalize_rescue(self, task):
        node = task.node
        try:
            result = self._client.finalize_rescue(node)
        task.process_event('resume')#恢复node的状态
        task.driver.rescue.clean_up(task)#清理此节点的部署环境
        task.driver.network.configure_tenant_networks(task)#将网络调整到之前的租户网络
        task.process_event('done')#返回task状态为done
View Code
原文地址:https://www.cnblogs.com/gushiren/p/9512846.html