Linux内核源代码解析——用户发送数据包的起源之sendto

本文原创为freas_1990,转载请标明出处:http://blog.csdn.net/freas_1990/article/details/10162853


Jack:我想知道用户如何把数据发送到内核空间的?

我:你觉得哪里比较难理解呢?

Jack:一般程序员会在程序里通过socket变量获得一个文件描述符,然后通过write把定义好的字符串写入到该描述符。

我:是的。你有什么不明白的吗?

Jack:可是,我不知道这个write底层到底会做什么。

我:这个write底层会调用sock_send函数。我给你看一下这个函数的定义。

static int
sock_send(int fd, void * buff, int len, unsigned flags)
{
  struct socket *sock;
  struct file *file;

  DPRINTF((net_debug,
	"NET: sock_send(fd = %d, buff = %X, len = %d, flags = %X)
",
       							fd, buff, len, flags));

  if (fd < 0 || fd >= NR_OPEN || ((file = current->filp[fd]) == NULL))
								return(-EBADF);
  if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK);

  return(sock->ops->send(sock, buff, len, (file->f_flags & O_NONBLOCK), flags));
}

sock_send函数通过用户传入的socket描述符fd找到对应的struct socket结构,然后把找到的socket结构。然后把socket结构(sock),buff(这是一个逻辑地址),以及文件flag传入传输层的对应函数。

最后一个语句return调用了一个函数指针( 这就是函数指针的妙处!),这个函数指针如果对应下面的传输层协议是UDP协议,就会调用udp_sendto.

static int
udp_write(struct sock *sk, unsigned char *buff, int len, int noblock,
  unsigned flags)
{
  return(udp_sendto(sk, buff, len, noblock, flags, NULL, 0));
}

其实是一个包裹函数。干活儿的是udp_sendto。

static int
udp_sendto(struct sock *sk, unsigned char *from, int len, int noblock,
   unsigned flags, struct sockaddr_in *usin, int addr_len)
{
  struct sockaddr_in sin;
  int tmp;
  int err;


  DPRINTF((DBG_UDP, "UDP: sendto(len=%d, flags=%X)
", len, flags));


  /* Check the flags. */
  if (flags) 
  return(-EINVAL);
  if (len < 0) 
  return(-EINVAL);
  if (len == 0) 
  return(0);


  /* Get and verify the address. */
  if (usin) {
if (addr_len < sizeof(sin)) return(-EINVAL);
err=verify_area(VERIFY_READ, usin, sizeof(sin));
if(err)
return err;
memcpy_fromfs(&sin, usin, sizeof(sin));
if (sin.sin_family && sin.sin_family != AF_INET) 
return(-EINVAL);
if (sin.sin_port == 0) 
return(-EINVAL);
  } else {
if (sk->state != TCP_ESTABLISHED) return(-EINVAL);
sin.sin_family = AF_INET;
sin.sin_port = sk->dummy_th.dest;
sin.sin_addr.s_addr = sk->daddr;
  }
  
  if(!sk->broadcast && chk_addr(sin.sin_addr.s_addr)==IS_BROADCAST)
    return -EACCES;/* Must turn broadcast on first */
  sk->inuse = 1;


  /* Send the packet. */
  tmp = udp_send(sk, &sin, from, len);


  /* The datagram has been sent off.  Release the socket. */
  release_sock(sk);
  return(tmp);
}

这其实也是一个包裹函数,真正干活的是udp_send函数。

static int
udp_send(struct sock *sk, struct sockaddr_in *sin,
	 unsigned char *from, int len)
{
  struct sk_buff *skb;
  struct device *dev;
  struct udphdr *uh;
  unsigned char *buff;
  unsigned long saddr;
  int size, tmp;
  int err;
  
  DPRINTF((DBG_UDP, "UDP: send(dst=%s:%d buff=%X len=%d)
",
		in_ntoa(sin->sin_addr.s_addr), ntohs(sin->sin_port),
		from, len));

  err=verify_area(VERIFY_READ, from, len);
  if(err)
  	return(err);

  /* Allocate a copy of the packet. */
  size = sizeof(struct sk_buff) + sk->prot->max_header + len;
  skb = sk->prot->wmalloc(sk, size, 0, GFP_KERNEL);
  if (skb == NULL) return(-ENOMEM);

  skb->mem_addr = skb;
  skb->mem_len  = size;
  skb->sk       = NULL;	/* to avoid changing sk->saddr */
  skb->free     = 1;
  skb->arp      = 0;

  /* Now build the IP and MAC header. */
  buff = skb->data;
  saddr = 0;
  dev = NULL;
  DPRINTF((DBG_UDP, "UDP: >> IP_Header: %X -> %X dev=%X prot=%X len=%d
",
			saddr, sin->sin_addr.s_addr, dev, IPPROTO_UDP, skb->mem_len));
  tmp = sk->prot->build_header(skb, saddr, sin->sin_addr.s_addr,
			       &dev, IPPROTO_UDP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
  skb->sk=sk;	/* So memory is freed correctly */
			    
  if (tmp < 0 ) {
	sk->prot->wfree(sk, skb->mem_addr, skb->mem_len);
	return(tmp);
  }
  buff += tmp;
  saddr = dev->pa_addr;
  DPRINTF((DBG_UDP, "UDP: >> MAC+IP len=%d
", tmp));

  skb->len = tmp + sizeof(struct udphdr) + len;	/* len + UDP + IP + MAC */
  skb->dev = dev;
#ifdef OLD
  /*
   * This code used to hack in some form of fragmentation.
   * I removed that, since it didn't work anyway, and it made the
   * code a bad thing to read and understand. -FvK
   */
  if (len > dev->mtu) {
#else
  if (skb->len > 4095)
  {
#endif    
	printk("UDP: send: length %d > mtu %d (ignored)
", len, dev->mtu);
	sk->prot->wfree(sk, skb->mem_addr, skb->mem_len);
	return(-EMSGSIZE);
  }

  /* Fill in the UDP header. */
  uh = (struct udphdr *) buff;
  uh->len = htons(len + sizeof(struct udphdr));
  uh->source = sk->dummy_th.source;
  uh->dest = sin->sin_port;
  buff = (unsigned char *) (uh + 1);

  /* Copy the user data. */
  memcpy_fromfs(buff, from, len);

  /* Set up the UDP checksum. */
  udp_send_check(uh, saddr, sin->sin_addr.s_addr, skb->len - tmp, sk);

  /* Send the datagram to the interface. */
  sk->prot->queue_xmit(sk, dev, skb, 1);

  return(len);
}

这个函数里真正干活的是memcpy_fromfs函数,执行完了这个函数,数据就已经从用户空间拷贝到内核空间了。

之后的sk->prot->queue_xmit(sk, dev, skb, 1);通过函数指针把sk上的skb这个数据包排入发送队列。




原文地址:https://www.cnblogs.com/pangblog/p/3275603.html