Linux内核源代码解析——TCP状态转移图以及其实现

本文原创为freas_1990,转载请标明出处http://blog.csdn.net/freas_1990/article/details/10223581

TCP状态转移的原理并不高深,但是处理逻辑比较复杂,以下是TCP状态转移图。出自《TCP/IP协议详解:卷2》——W.Richard Stevens


这些状态是怎么实现的呢?

我们来看一下内核源代码。(server端部分)

int
tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
	unsigned long daddr, unsigned short len,
	unsigned long saddr, int redo, struct inet_protocol * protocol)
{
  struct tcphdr *th;
  struct sock *sk;

  if (!skb) {
	DPRINTF((DBG_TCP, "tcp.c: tcp_rcv skb = NULL
"));
	return(0);
  }
#if 0	/* FIXME: it's ok for protocol to be NULL */
  if (!protocol) {
	DPRINTF((DBG_TCP, "tcp.c: tcp_rcv protocol = NULL
"));
	return(0);
  }

  if (!opt) {	/* FIXME: it's ok for opt to be NULL */
	DPRINTF((DBG_TCP, "tcp.c: tcp_rcv opt = NULL
"));
  }
#endif
  if (!dev) {
	DPRINTF((DBG_TCP, "tcp.c: tcp_rcv dev = NULL
"));
	return(0);
  }
  th = skb->h.th;

  /* Find the socket. */
  sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
  DPRINTF((DBG_TCP, "<<
"));
  DPRINTF((DBG_TCP, "len = %d, redo = %d, skb=%X
", len, redo, skb));
  
  /* If this socket has got a reset its to all intents and purposes 
     really dead */
  if (sk!=NULL && sk->zapped)
	sk=NULL;

  if (sk) {
	 DPRINTF((DBG_TCP, "sk = %X:
", sk));
  }

  if (!redo) {
	if (tcp_check(th, len, saddr, daddr )) {
		skb->sk = NULL;
		DPRINTF((DBG_TCP, "packet dropped with bad checksum.
"));
if (inet_debug == DBG_SLIP) printk("
tcp_rcv: bad checksum
");
		kfree_skb(skb,FREE_READ);
		/*
		 * We don't release the socket because it was
		 * never marked in use.
		 */
		return(0);
	}

	th->seq = ntohl(th->seq);

	/* See if we know about the socket. */
	if (sk == NULL) {
		if (!th->rst)
			tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
		skb->sk = NULL;
		kfree_skb(skb, FREE_READ);
		return(0);
	}

	skb->len = len;
	skb->sk = sk;
	skb->acked = 0;
	skb->used = 0;
	skb->free = 0;
	skb->saddr = daddr;
	skb->daddr = saddr;

	/* We may need to add it to the backlog here. */
	cli();
	if (sk->inuse) {
		if (sk->back_log == NULL) {
			sk->back_log = skb;
			skb->next = skb;
			skb->prev = skb;
		} else {
			skb->next = sk->back_log;
			skb->prev = sk->back_log->prev;
			skb->prev->next = skb;
			skb->next->prev = skb;
		}
		sti();
		return(0);
	}
	sk->inuse = 1;
	sti();
  } else {
	if (!sk) {
		DPRINTF((DBG_TCP, "tcp.c: tcp_rcv bug sk=NULL redo = 1
"));
		return(0);
	}
  }

  if (!sk->prot) {
	DPRINTF((DBG_TCP, "tcp.c: tcp_rcv sk->prot = NULL 
"));
	return(0);
  }

  /* Charge the memory to the socket. */
  if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) {
	skb->sk = NULL;
	DPRINTF((DBG_TCP, "dropping packet due to lack of buffer space.
"));
	kfree_skb(skb, FREE_READ);
	release_sock(sk);
	return(0);
  }
  sk->rmem_alloc += skb->mem_len;

  DPRINTF((DBG_TCP, "About to do switch.
"));

  /* Now deal with it. */
  switch(sk->state) {
	/*
	 * This should close the system down if it's waiting
	 * for an ack that is never going to be sent.
	 */
	case TCP_LAST_ACK:
		if (th->rst) {
			sk->zapped=1;
			sk->err = ECONNRESET;
 			sk->state = TCP_CLOSE;
			sk->shutdown = SHUTDOWN_MASK;
			if (!sk->dead) {
				sk->state_change(sk);
			}
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

	case TCP_ESTABLISHED:
	case TCP_CLOSE_WAIT:
	case TCP_FIN_WAIT1:
	case TCP_FIN_WAIT2:
	case TCP_TIME_WAIT:
		if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
if (inet_debug == DBG_SLIP) printk("
tcp_rcv: not in seq
");
#ifdef undef
/* nice idea, but tcp_sequence already does this.  Maybe it shouldn't?? */
			if(!th->rst)
				tcp_send_ack(sk->sent_seq, sk->acked_seq, 
				     sk, th, saddr);
#endif
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

		if (th->rst) {
			sk->zapped=1;
			/* This means the thing should really be closed. */
			sk->err = ECONNRESET;

			if (sk->state == TCP_CLOSE_WAIT) {
				sk->err = EPIPE;
			}

			/*
			 * A reset with a fin just means that
			 * the data was not all read.
			 */
			sk->state = TCP_CLOSE;
			sk->shutdown = SHUTDOWN_MASK;
			if (!sk->dead) {
				sk->state_change(sk);
			}
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}
		if (
#if 0
		if ((opt && (opt->security != 0 ||
			    opt->compartment != 0)) || 
#endif
				 th->syn) {
			sk->err = ECONNRESET;
			sk->state = TCP_CLOSE;
			sk->shutdown = SHUTDOWN_MASK;
			tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
			if (!sk->dead) {
				sk->state_change(sk);
			}
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

		if (th->ack && !tcp_ack(sk, th, saddr, len)) {
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

		if (tcp_urg(sk, th, saddr, len)) {
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

		if (tcp_data(skb, sk, saddr, len)) {
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

		/* Moved: you must do data then fin bit */
		if (th->fin && tcp_fin(sk, th, saddr, dev)) {
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

		release_sock(sk);
		return(0);

	case TCP_CLOSE:
		if (sk->dead || sk->daddr) {
			DPRINTF((DBG_TCP, "packet received for closed,dead socket
"));
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

		if (!th->rst) {
			if (!th->ack)
				th->ack_seq = 0;
			tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
		}
		kfree_skb(skb, FREE_READ);
		release_sock(sk);
		return(0);

	case TCP_LISTEN:
		if (th->rst) {
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}
		if (th->ack) {
			tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

		if (th->syn) {
#if 0
			if (opt->security != 0 || opt->compartment != 0) {
				tcp_reset(daddr, saddr, th, prot, opt,dev);
				release_sock(sk);
				return(0);
			}
#endif

			/*
			 * Now we just put the whole thing including
			 * the header and saddr, and protocol pointer
			 * into the buffer.  We can't respond until the
			 * user tells us to accept the connection.
			 */
			tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
			release_sock(sk);
			return(0);
		}

		kfree_skb(skb, FREE_READ);
		release_sock(sk);
		return(0);

	default:
		if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

	case TCP_SYN_SENT:
		if (th->rst) {
			sk->err = ECONNREFUSED;
			sk->state = TCP_CLOSE;
			sk->shutdown = SHUTDOWN_MASK;
			sk->zapped = 1;
			if (!sk->dead) {
				sk->state_change(sk);
			}
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}
#if 0
		if (opt->security != 0 || opt->compartment != 0) {
			sk->err = ECONNRESET;
			sk->state = TCP_CLOSE;
			sk->shutdown = SHUTDOWN_MASK;
			tcp_reset(daddr, saddr,  th, sk->prot, opt, dev);
			if (!sk->dead) {
				wake_up_interruptible(sk->sleep);
			}
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}
#endif
		if (!th->ack) {
			if (th->syn) {
				sk->state = TCP_SYN_RECV;
			}

			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

		switch(sk->state) {
			case TCP_SYN_SENT:
				if (!tcp_ack(sk, th, saddr, len)) {
					tcp_reset(daddr, saddr, th,
							sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
					kfree_skb(skb, FREE_READ);
					release_sock(sk);
					return(0);
				}

				/*
				 * If the syn bit is also set, switch to
				 * tcp_syn_recv, and then to established.
				 */
				if (!th->syn) {
					kfree_skb(skb, FREE_READ);
					release_sock(sk);
					return(0);
				}

				/* Ack the syn and fall through. */
				sk->acked_seq = th->seq+1;
				sk->fin_seq = th->seq;
				tcp_send_ack(sk->sent_seq, th->seq+1,
							sk, th, sk->daddr);
	
			case TCP_SYN_RECV:
				if (!tcp_ack(sk, th, saddr, len)) {
					tcp_reset(daddr, saddr, th,
							sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
					kfree_skb(skb, FREE_READ);
					release_sock(sk);
					return(0);
				}
				sk->state = TCP_ESTABLISHED;

				/*
				 * Now we need to finish filling out
				 * some of the tcp header.
				 */
				/* We need to check for mtu info. */
				tcp_options(sk, th);
				sk->dummy_th.dest = th->source;
				sk->copied_seq = sk->acked_seq-1;
				if (!sk->dead) {
					sk->state_change(sk);
				}

				/*
				 * We've already processed his first
				 * ack.  In just about all cases that
				 * will have set max_window.  This is
				 * to protect us against the possibility
				 * that the initial window he sent was 0.
				 * This must occur after tcp_options, which
				 * sets sk->mtu.
				 */
				if (sk->max_window == 0) {
				  sk->max_window = 32;
				  sk->mss = min(sk->max_window, sk->mtu);
				}

				/*
				 * Now process the rest like we were
				 * already in the established state.
				 */
				if (th->urg) {
					if (tcp_urg(sk, th, saddr, len)) { 
						kfree_skb(skb, FREE_READ);
						release_sock(sk);
						return(0);
					}
			}
			if (tcp_data(skb, sk, saddr, len))
						kfree_skb(skb, FREE_READ);

			if (th->fin) tcp_fin(sk, th, saddr, dev);
			release_sock(sk);
			return(0);
		}

		if (th->urg) {
			if (tcp_urg(sk, th, saddr, len)) {
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
		}

		if (tcp_data(skb, sk, saddr, len)) {
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);
		}

		if (!th->fin) {
			release_sock(sk);
			return(0);
		}
		tcp_fin(sk, th, saddr, dev);
		release_sock(sk);
		return(0);
	}
}






原文地址:https://www.cnblogs.com/james1207/p/3278506.html