TCP拥塞控制算法内核实现剖析（六）

内核版本：3.2.12

主要源文件：linux-3.2.12/ net/ ipv4/ tcp_highspeed.c

本文主要分析highspeed的实现，作者zhangskd @ csdn
====================================================================================

原理

HighSpeed TCP(HSTCP) uses a generalized AIMD where the linear increase factor and multiplicative

decrease factor are adjusted by a convex function of the current congestion window size. When the

congestion window is less than some cutoff value, HSTCP uses the same factor as TCP. Most of

highspeed TCP variants support this form of TCP compatibility, which is based on the window size.

When the window grows beyond the cutoff point, the convex function increase the increase factor and

reduces the decrease factor proportionally to the window size.

主要改进方面：动态调整AIMD。当cwnd增大时，增加加性因子，减少乘性因子。当拥塞窗口越大时，

增得越快，丢包时降得越少。

数据结构

/* From AIMD tables from RFC 3649 appendix B,
 * with fixed-point MD scaled << 8
 */

static const struct hstcp_aimd_val {
    unsigned int cwnd; /* 区间分隔点*/
    unsigned int md; /* 放大256倍的乘性减小因子*/
} hstcp_aimd_vals[] = {
 {     38,  128, /*  0.50 */ },
 {    118,  112, /*  0.44 */ },
 {    221,  104, /*  0.41 */ },
 {    347,   98, /*  0.38 */ },
 {    495,   93, /*  0.37 */ },
 {    663,   89, /*  0.35 */ },
 {    851,   86, /*  0.34 */ },
 {   1058,   83, /*  0.33 */ },
 {   1284,   81, /*  0.32 */ },
 {   1529,   78, /*  0.31 */ },
 {   1793,   76, /*  0.30 */ },
 {   2076,   74, /*  0.29 */ },
 {   2378,   72, /*  0.28 */ },
 {   2699,   71, /*  0.28 */ },
 {   3039,   69, /*  0.27 */ },
 {   3399,   68, /*  0.27 */ },
 {   3778,   66, /*  0.26 */ },
 {   4177,   65, /*  0.26 */ },
 {   4596,   64, /*  0.25 */ },
 {   5036,   62, /*  0.25 */ },
 {   5497,   61, /*  0.24 */ },
 {   5979,   60, /*  0.24 */ },
 {   6483,   59, /*  0.23 */ },
 {   7009,   58, /*  0.23 */ },
 {   7558,   57, /*  0.22 */ },
 {   8130,   56, /*  0.22 */ },
 {   8726,   55, /*  0.22 */ },
 {   9346,   54, /*  0.21 */ },
 {   9991,   53, /*  0.21 */ },
 {  10661,   52, /*  0.21 */ },
 {  11358,   52, /*  0.20 */ },
 {  12082,   51, /*  0.20 */ },
 {  12834,   50, /*  0.20 */ },
 {  13614,   49, /*  0.19 */ },
 {  14424,   48, /*  0.19 */ },
 {  15265,   48, /*  0.19 */ },
 {  16137,   47, /*  0.19 */ },
 {  17042,   46, /*  0.18 */ },
 {  17981,   45, /*  0.18 */ },
 {  18955,   45, /*  0.18 */ },
 {  19965,   44, /*  0.17 */ },
 {  21013,   43, /*  0.17 */ },
 {  22101,   43, /*  0.17 */ },
 {  23230,   42, /*  0.17 */ },
 {  24402,   41, /*  0.16 */ },
 {  25618,   41, /*  0.16 */ },
 {  26881,   40, /*  0.16 */ },
 {  28193,   39, /*  0.16 */ },
 {  29557,   39, /*  0.15 */ },
 {  30975,   38, /*  0.15 */ },
 {  32450,   38, /*  0.15 */ },
 {  33986,   37, /*  0.15 */ },
 {  35586,   36, /*  0.14 */ },
 {  37253,   36, /*  0.14 */ },
 {  38992,   35, /*  0.14 */ },
 {  40808,   35, /*  0.14 */ },
 {  42707,   34, /*  0.13 */ },
 {  44694,   33, /*  0.13 */ },
 {  46776,   33, /*  0.13 */ },
 {  48961,   32, /*  0.13 */ },
 {  51258,   32, /*  0.13 */ },
 {  53677,   31, /*  0.12 */ },
 {  56230,   30, /*  0.12 */ },
 {  58932,   30, /*  0.12 */ },
 {  61799,   29, /*  0.12 */ },
 {  64851,   28, /*  0.11 */ },
 {  68113,   28, /*  0.11 */ },
 {  71617,   27, /*  0.11 */ },
 {  75401,   26, /*  0.10 */ },
 {  79517,   26, /*  0.10 */ },
 {  84035,   25, /*  0.10 */ },
 {  89053,   24, /*  0.10 */ },
};

#define HSTCP_AIMD_MAX ARRAY_SIZE(hstcp_aimd_vals)/*hstcp_aimd_vals[]的元素个数*/

struct hstcp {
    u32 ai; /*加性因子*/
};

可以把hstcp_aimd_vals[]看成一个分段函数，这个分段函数的自变量是cwnd，值为md。

当丢包时，通过查询snd_cwnd所属于的区间，来确定乘性减小因子md。可以看到，当snd_cwnd < 38时，

md = 0.5，这跟Reno相同。随着snd_cwnd的增大，逐渐调小md，这样在BDP网络中不会因为丢包而严重

降低吞吐量。

那么ai是怎么调整的呢？对于确定的snd_cwnd，查找hstcp_aimd_vals[]的元素下标，使得：

hstcp_aimd_vals[ai-1] < snd_cwnd <= hstcp_aimd_vals[ai]，这样就可以确定ai的值了。

highspeed算法中，在拥塞避免阶段每个RTT内，snd_cwnd增加(ai+1)个数据包。所以当ai=0时，snd_cwnd

每个RTT内只增加一个数据包，同Reno。

highspeed算法的思想很简单，只是单纯的根据snd_cwnd动态的调整AIMD。

函数

static void hstcp_init (struct sock *sk)

{
    struct tcp_sock *tp = tcp_sk(sk);
    struct hstcp *ca = inet_csk_ca(sk);

    ca->ai = 0;

    /* Ensure the MD arithmetic works. This is somewhat pedantic,
     * since I don't think we will see a cwnd this large. :) */

    /* 噗！这条注释亮了！哈哈，作者还考虑到运算溢出。
     * 一条连接能占1Tbps左右的带宽吗，明显是想太多了。
     */
    tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);

}

static void hstcp_cong_avoid (struct sock *sk, u32 ack, u32 in_flight)
{
    struct tcp_sock *tp = tcp_sk(sk);
    struct hstcp *ca = inet_csk_ca(sk);

    if (!tcp_is_cwnd_limited(sk, in_flight))
        return;

    if (tp->snd_cwnd <= tp->snd_ssthresh)
        tcp_slow_start(tp); /* 慢启动 */

    else { /* 拥塞避免 */
        /* Update AIMD parameters.
        * We want to guarantee that :
        * hstcp_aimd_vals[ca->ai-1].cwnd < snd_cwnd <= hstcp_aimd_vals[ca->ai].cwnd
        * 即根据snd_cwnd更新ai
        */
        if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
          while(tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && ca->ai < HSTCP_AIMD_MAX-1)
                ca->ai++;

        } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) {
            while(ca->ai && tp->snd_cwnd <=hstcp_aimd_vals[ca->ai-1].cwnd)
                ca->ai--;
        }

       /* Do additive increase */
       if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
           tp->snd_cwnd_cnt += ca->ai + 1; /* snd_cwnd每个RTT内增加（ai+1）*/
           if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
               tp->snd_cwnd_cnt -= tp->snd_cwnd;
               tp->snd_cwnd++;
           }
       }
}

static u32 hstcp_ssthresh(struct sock *sk)
{
    const struct tcp_sock *tp = tcp_sk(sk);
    const struct hstcp *ca = inet_csk_ca(sk);

    /* Do multiplicative decrease */
    return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md)>>8), 2U);
}

性能

highspeed的特点在于：随着cwnd的增大，它能动态的调大AI，调小MD，这样就可以达到更大的吞吐量。

不足：

“One of the issues with HighSpeed TCP is that of long convergence times between new and old

HighSpeed TCP flows. This can be a particularly severe problem in simplier scnarios(e.g., no web

traffic, no reverse path traffic, no range of round-trip times) with Drop-Tail queues.”

HighSpeed的收敛需要较长的时间，而BIC/CUBIC则具能快速收敛。