setsockopt的TCP层实现剖析

应用层

NAME

    setsockopt - set options on sockets

SYNOPSIS

    #include <sys/types.h>

    #include <sys/socket.h>

    int setsockopt (int s, int level, int optname, const void *optval, socklen_t optlen);

EXAMPLE

    自定义一个TCP层Socket选项:TCP_MAX_CWND。

    int one = 1;

    setsockopt(sockfd, SOL_TCP, TCP_MAX_CWND, &one, sizeof(one));

来看一下通用的TCP层Socket选项:

@netinet/tcp.h:

/* User-settable options (used with setsockopt). */
#define TCP_NODELAY    1            /* Don't delay send to coalesce packets */
#define TCP_MAXSEG    2           /* Set Maximum segment size */
#define TCP_CORK    3           /* Control sending of partial frames */
#define TCP_KEEPIDLE    4        /* Start keepalives after this period */
#define TCP_KEEPINTVL    5        /* Interval between keepalives */
#define TCP_KEEPCNT    6        /* Number of keepalives before death */
#define TCP_SYNCNT    7        /* Number of SYN retransmits */
#define TCP_LINGER2    8        /* Life time of orphaned FIN_WAIT2 state */
#define TCP_DEFER_ACCEPT    9        /* Wake up listener only when data arrive */ 
#define TCP_WINDOW_CLAMP    10        /* Bound advertised window */
#define TCP_INFO    11        /* 注意:这个选项不能用于设置,只用于读取。Information about this connection */
#define TCP_QUICKACK    12        /* Bock/reenable quick ACKs */ 

Linux除了支持以上通用的TCP层Socket选项,还支持一些它特有的选项(较新的版本中又多了一些:)

@linux/tcp.h:

/* TCP soket options */
#define TCP_NODELAY    1        /* Turn off Nagle's algorithm. */
#define TCP_MAXSEG    2        /* Limit MSS */
#define TCP_CORK    3        /* Never send partially complete segments */
#define TCP_KEEPIDLE    4        /* Start keepalives after this period */
#define TCP_KEEPINTVL    5        /* Interval between keepalives */
#define TCP_KEEPCNT    6        /* Number of keepalives before death */
#define TCP_SYNCNT    7        /* Number of SYN retransmits */
#define TCP_LINGER2    8        /* Life time of orphaned FIN_WAIT2 state */
#define TCP_DEFER_ACCEPT 9        /* Wake up listener only when data arrive */
#define TCP_WINDOW_CLAMP    10        /* Bound advertised window */
#define TCP_INFO    11        /* Information about this connection. */
#define TCP_QUICKACK    12        /* Block/reenable quick acks */
#define TCP_CONGESTION    13        /* Congestion control algorithm */
#define TCP_MD5SIG    14        /* TCP MD5 Signature (RFC2385) */
#define TCP_COOKIE_TRANSACTIONS    15        /* TCP Cookie Transactions */
#define TCP_THIN_LINEAR_TIMEOUTS    16        /* Use linear timeouts for thin streams */
#define TCP_THIN_DUPACK    17        /* Fast retrans. after 1 dupack */

函数关系

函数调用关系图如下:

数据结构

struct proto tcp_prot = {
    .name = "TCP",
    .owner = THIS_MODULE,
    ...
    .setsockopt = tcp_setsockopt,
    .getsockopt = tcp_getsockopt,
    ...
};

函数实现

int tcp_setsockopt (struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen)
{
    struct inet_connection_sock *icsk = inet_csk(sk);

    if (level != SOL_TCP)
        return icsk->icsk_af_ops->setsockopt(sk, level, optname, optval, optlen);

    return do_tcp_setsockopt(sk, level, optname, optval, optlen);
}
static int do_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen)
{
    struct tcp_sock *tp = tcp_sk(sk);
    struct inet_connection_sock *icsk = inet_csk(sk);
    int val;
    int err = 0;

    /* These are data/string values, all the others are ints */
    switch (optname) {
        /* 指定连接要使用的TCP拥塞控制算法 */
        case TCP_CONGESTION: { 
            char name[TCP_CA_NAME_MAX];

            if (optlen < 1)
                return -EINVAL;

            val = strncpy_from_user(name, optval,  
                         min_t(long, TCP_CA_NAME_MAX - 1, optlen)); /*说明名字不宜超过15字节*/

            if (val < 0)
                return -EFAULT;
            name[val] = 0;

            lock_sock(sk);
            err = tcp_set_congestion_control(sk, name);
            release_sock(sk);

            return err;
        }

        case TCP_COOKIE_TRANSACTIONS: {
            ...
        }

        default:
            break; /* fallthru */
    }
 
    if (optlen < sizeof(int))
        return -EINVAL; /* -22, Invalid argument */

    if (get_user(val, (int __user *) optval)) /* 获取用户空间的数据 */
        return -EFAULT; /* -14, Bad address */

    lock_sock(sk); 

    switch(optname) {
        case TCP_MAXSEG:
            /* Values greater than interface MTU won't take effect. However at the point
             * when this call is done we typically don't yet know which interface is going to be used */
            if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { /* 必须在88 ~ 32767之间 */
                err = -EINVAL;
                break;
            }
            tp->rx_opt.user_mss = val; /*以后本端和对端的MSS都不会大于这个值了 */
            break;
        ...
        case TCP_WINDOW_CLAMP:
            if (! val) {
                if (sk->sk_state != TCP_CLOSE) {
                    err = -EINVAL;
                    break;
                }
                tp->window_clamp = 0; /* tp->window_clamp: Maximal window to advertise */

            } else /* 最小的通告窗口:(2048 + sizeof(struct sk_buff)) / 2) */
                tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? SOCK_MIN_RCVBUF/2 : val;
            break;
        ...
        case TCP_THIN_DUPACK:
            if (val < 0 || val > 1)
                err = -EINVAL;
            else
                tp->thin_dupack = val;
            break;

        case TCP_MAX_CWND:   //自定义选项
            if (val < 0)
                err = -EINVAL;
            else
                tp->snd_cwnd_clamp = val; /* change max value of snd_cwnd */
            break;
        ...
        default:
            err = -ENOPROTOOPT; /* -92, 协议无此选项 */
    }

    release_sock(sk);
    return err;
}

Author

zhangskd @ csdn blog

原文地址:https://www.cnblogs.com/aiwz/p/6333344.html