zookeeper心跳机制流程梳理

zookeeper心跳机制流程梳理

Processor链Chain

protected void setupRequestProcessors() {
    RequestProcessor finalProcessor = new FinalRequestProcessor(this);
    RequestProcessor toBeAppliedProcessor = new Leader.ToBeAppliedRequestProcessor(
            finalProcessor, getLeader().toBeApplied);
    commitProcessor = new CommitProcessor(toBeAppliedProcessor,
            Long.toString(getServerId()), false);
    commitProcessor.start();
    ProposalRequestProcessor proposalProcessor = new ProposalRequestProcessor(this,
            commitProcessor);
    proposalProcessor.initialize();
    firstProcessor = new PrepRequestProcessor(this, proposalProcessor);
    ((PrepRequestProcessor)firstProcessor).start();
}

zookeeper 客户端发送心跳,主要在

@Override
public void run() {
    clientCnxnSocket.introduce(this,sessionId);
    clientCnxnSocket.updateNow();
    //最近一次心跳,都设置为当前值
    clientCnxnSocket.updateLastSendAndHeard();
    int to;
    long lastPingRwServer = System.currentTimeMillis();
    final int MAX_SEND_PING_INTERVAL = 10000; //10 seconds
    while (state.isAlive()) {
        try {
        	//不为连接状态则连接
            if (!clientCnxnSocket.isConnected()) {
                if(!isFirstConnect){
                    try {
                        Thread.sleep(r.nextInt(1000));
                    } catch (InterruptedException e) {
                        LOG.warn("Unexpected exception", e);
                    }
                }
                // don't re-establish connection if we are closing
                if (closing || !state.isAlive()) {
                    break;
                }
                //注册OP_CONNECT事件      sockKey = sock.register(selector, SelectionKey.OP_CONNECT);
                startConnect();
                //最近一次发送接收时间
                clientCnxnSocket.updateLastSendAndHeard();
            }

            if (state.isConnected()) {
                // determine whether we need to send an AuthFailed event.
                if (zooKeeperSaslClient != null) {
                    boolean sendAuthEvent = false;
                    if (zooKeeperSaslClient.getSaslState() == ZooKeeperSaslClient.SaslState.INITIAL) {
                        try {
                            zooKeeperSaslClient.initialize(ClientCnxn.this);
                        } catch (SaslException e) {
                           LOG.error("SASL authentication with Zookeeper Quorum member failed: " + e);
                            state = States.AUTH_FAILED;
                            sendAuthEvent = true;
                        }
                    }
                    KeeperState authState = zooKeeperSaslClient.getKeeperState();
                    if (authState != null) {
                        if (authState == KeeperState.AuthFailed) {
                            // An authentication error occurred during authentication with the Zookeeper Server.
                            state = States.AUTH_FAILED;
                            sendAuthEvent = true;
                        } else {
                            if (authState == KeeperState.SaslAuthenticated) {
                                sendAuthEvent = true;
                            }
                        }
                    }

                    if (sendAuthEvent == true) {
                        eventThread.queueEvent(new WatchedEvent(
                              Watcher.Event.EventType.None,
                              authState,null));
                    }
                }
                //读超时-读空闲时间
                to = readTimeout - clientCnxnSocket.getIdleRecv();
            } else {
            	//连接超时-读空闲时间
                to = connectTimeout - clientCnxnSocket.getIdleRecv();
            }
            
            if (to <= 0) {
                throw new SessionTimeoutException(
                        "Client session timed out, have not heard from server in "
                                + clientCnxnSocket.getIdleRecv() + "ms"
                                + " for sessionid 0x"
                                + Long.toHexString(sessionId));
            }
            if (state.isConnected()) {
            	//1000(1 second) is to prevent race condition missing to send the second ping
            	//also make sure not to send too many pings when readTimeout is small 
            	//减少心跳发送次数,以免过多发送ping
                int timeToNextPing = readTimeout / 2 - clientCnxnSocket.getIdleSend() - 
                		((clientCnxnSocket.getIdleSend() > 1000) ? 1000 : 0);
                //send a ping request either time is due or no packet sent out within MAX_SEND_PING_INTERVAL
                //写空闲的时候才发送心跳
                if (timeToNextPing <= 0 || clientCnxnSocket.getIdleSend() > MAX_SEND_PING_INTERVAL) {
                	/**
                	 * 发送ping请求,
                	 * 首次通过queuePacket将请求加入outgoingQueue队列
                	 * 唤醒sendThread.getClientCnxnSocket().wakeupCnxn();-->selector.wakeup();
                	 */
                	sendPing();
                    //最近一次发送时间
                    clientCnxnSocket.updateLastSend();
                } else {
                    if (timeToNextPing < to) {
                        to = timeToNextPing;
                    }
                }
            }

            // If we are in read-only mode, seek for read/write server
            if (state == States.CONNECTEDREADONLY) {
                long now = System.currentTimeMillis();
                int idlePingRwServer = (int) (now - lastPingRwServer);
                if (idlePingRwServer >= pingRwTimeout) {
                    lastPingRwServer = now;
                    idlePingRwServer = 0;
                    pingRwTimeout =
                        Math.min(2*pingRwTimeout, maxPingRwTimeout);
                    pingRwServer();
                }
                to = Math.min(to, pingRwTimeout - idlePingRwServer);
            }

            clientCnxnSocket.doTransport(to, pendingQueue, outgoingQueue, ClientCnxn.this);
        } catch (Throwable e) {
            if (closing) {
                if (LOG.isDebugEnabled()) {
                    // closing so this is expected
                    LOG.debug("An exception was thrown while closing send thread for session 0x"
                            + Long.toHexString(getSessionId())
                            + " : " + e.getMessage());
                }
                break;
            } else {
                // this is ugly, you have a better way speak up
                if (e instanceof SessionExpiredException) {
                    LOG.info(e.getMessage() + ", closing socket connection");
                } else if (e instanceof SessionTimeoutException) {
                    LOG.info(e.getMessage() + RETRY_CONN_MSG);
                } else if (e instanceof EndOfStreamException) {
                    LOG.info(e.getMessage() + RETRY_CONN_MSG);
                } else if (e instanceof RWServerFoundException) {
                    LOG.info(e.getMessage());
                } else {
                    LOG.warn(
                            "Session 0x"
                                    + Long.toHexString(getSessionId())
                                    + " for server "
                                    + clientCnxnSocket.getRemoteSocketAddress()
                                    + ", unexpected error"
                                    + RETRY_CONN_MSG, e);
                }
                cleanup();
                if (state.isAlive()) {
                    eventThread.queueEvent(new WatchedEvent(
                            Event.EventType.None,
                            Event.KeeperState.Disconnected,
                            null));
                }
                clientCnxnSocket.updateNow();
                clientCnxnSocket.updateLastSendAndHeard();
            }
        }
    }
    cleanup();
    clientCnxnSocket.close();
    if (state.isAlive()) {
        eventThread.queueEvent(new WatchedEvent(Event.EventType.None,
                Event.KeeperState.Disconnected, null));
    }
    ZooTrace.logTraceMessage(LOG, ZooTrace.getTextTraceLevel(),
                             "SendThread exitedloop.");
}

服务端处理,首先是LeaderZookeeper的处理链

protected void setupRequestProcessors() {
    RequestProcessor finalProcessor = new FinalRequestProcessor(this);
    RequestProcessor toBeAppliedProcessor = new Leader.ToBeAppliedRequestProcessor(
            finalProcessor, getLeader().toBeApplied);
    commitProcessor = new CommitProcessor(toBeAppliedProcessor,
            Long.toString(getServerId()), false);
    commitProcessor.start();
    ProposalRequestProcessor proposalProcessor = new ProposalRequestProcessor(this,
            commitProcessor);
    proposalProcessor.initialize();
    firstProcessor = new PrepRequestProcessor(this, proposalProcessor);
    ((PrepRequestProcessor)firstProcessor).start();
}

ping请求主要在PrepRequestProcessor和FinalRequestProcessor处理
PrepRequestProcessor中感觉没做啥啊,checkSession()

 case OpCode.ping:
            case OpCode.setWatches:
                zks.sessionTracker.checkSession(request.sessionId,
                        request.getOwner());
                break;

checkSession方法判断sesion是不是close状态,

synchronized public void checkSession(long sessionId, Object owner) throws KeeperException.SessionExpiredException, KeeperException.SessionMovedException {
    SessionImpl session = sessionsById.get(sessionId);
    if (session == null || session.isClosing()) {
        throw new KeeperException.SessionExpiredException();
    }
    if (session.owner == null) {
        session.owner = owner;
    } else if (session.owner != owner) {
        throw new KeeperException.SessionMovedException();
    }
}

session的这个closing会在两种情况下设置为true,是否需要关闭

case OpCode.closeSession:
zks.sessionTracker.setSessionClosing(request.sessionId);

还有一个是在SessionTrackerImpl的run()方法中

synchronized public void run() {
    try {
        while (running) {
            currentTime = System.currentTimeMillis();
            if (nextExpirationTime > currentTime) {
                this.wait(nextExpirationTime - currentTime);
                continue;
            }
            SessionSet set;
            set = sessionSets.remove(nextExpirationTime);
            if (set != null) {
                for (SessionImpl s : set.sessions) {
                	//将session的isClosing状态设置为true
                    setSessionClosing(s.sessionId);
                    /**
                     * submitRequest发送close请求,OpCode.closeSession=-11,
                     */
                    expirer.expire(s);
                }
            }
            nextExpirationTime += expirationInterval;
        }
    } catch (InterruptedException e) {
        LOG.error("Unexpected interruption", e);
    }
    LOG.info("SessionTrackerImpl exited loop!");
}

Zookeeper

public void expire(Session session) {
    long sessionId = session.getSessionId();
    LOG.info("Expiring session 0x" + Long.toHexString(sessionId)
            + ", timeout of " + session.getTimeout() + "ms exceeded");
    close(sessionId);
}

private void close(long sessionId) {
    submitRequest(null, sessionId, OpCode.closeSession, 0, null, null);
}

private void submitRequest(ServerCnxn cnxn, long sessionId, int type,
        int xid, ByteBuffer bb, List<Id> authInfo) {
	//type为OpCode.closeSession==-11,xid为0,这里xid, type有点坑,居然是反的
    Request si = new Request(cnxn, sessionId, xid, type, bb, authInfo);
    submitRequest(si);
}

再来看看FinalRequestProcessor中的处理,回复心跳响应

case OpCode.ping: {
            zks.serverStats().updateLatency(request.createTime);
            lastOp = "PING";
            cnxn.updateStatsForResponse(request.cxid, request.zxid, lastOp,
                    request.createTime, System.currentTimeMillis());
            //发送心跳包响应
            cnxn.sendResponse(new ReplyHeader(-2,
                    zks.getZKDatabase().getDataTreeLastProcessedZxid(), 0), null, "response");
            return;
        }

回到客户端,在 doIO(pendingQueue, outgoingQueue, cnxn);主循环中,调用了sendThread.readResponse(incomingBuffer);来接受响应

sendThread.readResponse(incomingBuffer);

在中,可以看到,当客户端收到心跳(replyHdr.getXid() == -2与服务端发送的Xid一致)是啥也不错了,只是打印log

if (replyHdr.getXid() == -2) {
                // -2 is the xid for pings
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Got ping response for sessionid: 0x"
                            + Long.toHexString(sessionId)
                            + " after "
                            + ((System.nanoTime() - lastPingSentNs) / 1000000)
                            + "ms");
                }
                return;
            }
原文地址:https://www.cnblogs.com/donganwangshi/p/4233538.html