速度之王 — LZ4压缩算法（三）

LZ4 (Extremely Fast Compression algorithm)

项目：http://code.google.com/p/lz4/

作者：Yann Collet

本文作者：zhangskd @ csdn blog

实现

(3) 流操作

typedef struct {
    U32 hashTable[HASHNBCELLS4]; /* 哈希表 */
    const BYTE *bufferStart; /* 类似于前向缓存 */
    const BYTE *base; /* 哈希表中采用的基准地址srcBase */
    const BYTE *nextBlock; /* 下一个块的地址 */
} LZ4_Data_Structure;

FORCE_INLINE void LZ4_init(LZ4_Data_Structure *lz4ds, const BYTE *base)
{
    MEM_INIT(lz4ds->hashTable, 0, sizeof(lz4ds->hashTable));
    lz4ds->bufferStart = base;
    lz4ds->base = base;
    lz4ds->nextBlock = base;
}

创建和初始化LZ4_Data_Structure实例。

void *LZ4_create(const char *inputBuffer)
{
    void *lz4ds = ALLOCATOR(1, sizeof(LZ4_Data_Structure));
    LZ4_init((LZ4_Data_Structure *)lz4ds, (const BYTE *)inputBuffer);
    return lz4ds;
}

释放LZ4_Data_Structure实例。

int LZ4_free(void *LZ4_Data)
{
    FREEMEM(LZ4_Data);
    return 0;
}

当输入缓存不够的时候，进行调整。

char *LZ4_slideInputBuffer(void *LZ4_Data)
{
    LZ4_Data_Structure *lz4ds = (LZ4_Data_Structure *) LZ4_Data;
    size_t delta = lz4ds->nextBlock - (lz4ds->bufferStart + 64KB); /* 调整后地址的偏移 */

    if ((lz4ds->base - delta > lz4ds->base) ||                  /* underflow control */
         (size_t) (lz4ds->nextBlock - (lz4ds->base) > 0xE0000000)) /* close to 32-bit limit */
    {
        size_t deltaLimit = (lz4ds->nextBlock - 64KB) - lz4ds->base;
        int nH;
        for (nH = 0; nH < HASHNBCELLS4; nH++) {
            if ((size_t) (lz4ds->hashTable[nH]) < deltaLimit) lz4ds->hashTable[nH] = 0;
            else lz4ds->hashTable[nH] -= (U32) deltaLimit;
        }

        memcpy((void *)(lz4ds->bufferStart), (const void *)(lz4ds->nextBlock - 64KB), 64KB);
        lz4ds->base = lz4ds->bufferStart;
        lz4ds->nextBlock = lz4ds->base + 64KB;

    } else {
        /* 把下个块之前的64KB数据拷贝到buffer的头部 */
        memcpy((void *)(lz4ds->bufferStart), (const void *)(lz4ds->nextBlock - 64KB), 64KB);
        lz4ds->nextBlock -= delta; /* 更新下个块的地址 */
        /* 哈希表中的value为偏移值offset。
         * pos = base + offset，现在offset -= delta，但是我们又不想去更新offset (更新哈希表)。
         * 可以让base -= delta，这样可以不改变offset而取得正确的pos。
         * pos是真实的地址。
         */
        lz4ds->base -= delta; 
    }

    return (char *) (lz4ds->nextBlock);
}

(4) 解压

LZ4_decompress_generic()是通用的解压算法，只要符合压缩格式就可以解压，无需考虑匹配算法。

typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
typedef enum { full = 0, partial = 1 } earlyEnd_directive;

If endOnInput == endOnInputSize, outputSize is the max size of Output Buffer.
targetOutputSize only used if partialDecoding == partial.

FORCE_INLINE int LZ4_decompress_generic( const char *source, char *dest, int inputSize,
    int outputSize, int endOnInput, int prefix64k, int partialDecoding, int targetOutputSize)
{
    /* Local variables */
    const BYTE *restrict ip = (const BYTE *) source;
    const BYTE *ref;
    const BYTE *const iend = ip + inputSize;

    BYTE *op = (BYTE *) dest;
    BYTE *const oend = op + outputSize;
    BYTE *cpy;
    BYTE *oexit = op + targetOutputSize;

    /* static reduces speed for LZ4_compress_safe() on GCC64. */
    const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
    static const size_t dec64table[] = {0, 0, 0, (size_t) - 1, 0, 1, 2, 3};

    /* Special cases */
    /* targetOutputSize too high => decode everything */
    if ((partialDecoding) && (oexit > oend - MFLIMIT)) oexit = oend - MFLIMIT;
    /* Empty output buffer */
    if ((endOnInput) && unlikey(outputSize == 0)) return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
    if ((! endOnInput) && unlikely(outputSize == 0)) return (*ip == 0 ? 1 : -1);

    /* Main loop，每次循环解压一个序列 */
    while(1) {
        unsigned token;
        size_t length;

        /* get runlength，获取literal length */
        token = *ip++;

        if ((length = (token >> ML_BITS)) == RUN_MASK) {
            unsigned s = 255;
            while (((endOnInput) ? ip < iend : 1) && (s == 255)) {
                s = *ip++;
                length += s;
            }
        }

        /* copy literals */
        cpy = op + length;

        if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT)) || 
            (ip + length > iend - (2+1+LASTLITERALS))))
            || ((! endOnInput) && (cpy > oend - COPYLENGTH)))
        {
            if (partialDecoding) {
                if (cpy > oend) goto _output_error; /* write attempt beyond end of output buffer */
                if ((endOnInput) && (ip + length > iend)) goto _out_error; /* read attempt beyond end of input buffer */

            } else {
                if ((! endOnInput) && (cpy != oend)) goto _output_error; /* block decoding must stop exactly there */
                if ((endOnInput) && ((ip + length != iend) || (cpy > oend))) goto _output_error; /* input must be consumed */
            }

            memcpy(op, ip, length);
            ip += length;
            op += length;
            break; /* 注意，这里是退出口 */
        }
        LZ4_WILDCOPY(op, ip, cpy); ip -= (op - cpy); op = cpy; /* 拷贝literals */
 
        /* get offset，获取偏移值 */
        LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); ip += 2;

        /* offset outside destination buffer */
        if ((prefix64k == noPrefix) && unlikely(ref < (BYTE *const) dest)) goto _output_error;

        /* get matchlength，获取match length */
        if ((length = (token & ML_MASK)) == ML_MASK) {
            /* Ensure enough bytes remain for LASTLITERALS + token */
            while((! endOnInput) || (ip < iend - (LASTLITERALS + 1))) {
                unsigned s = *ip++;
                length += s;
                if (s == 255) continue;
                break;
            }
        }

        /* copy repeated sequence，拷贝匹配match */
        if unlikely((op - ref) < (int) STEPSIZE)  { /* 匹配和自身重叠的情况，拷贝STEPSIZE大小 */
            const size_t dec64 = dec64table[(sizeof(void *) == 4 ? 0 : op - ref];
            op[0] = ref[0]; op[1] = ref[1]; op[2] = ref[2]; op[3] = ref[3];
            op += 4; ref += 4;
            ref -= dec32table[op - ref];
            A32(op) = A32(ref);
            op += STEPSIZE - 4; ref -= dec64;
        } else
            LZ4_COPYSTEP(op, ref);

        cpy = op + length - STEPSIZE + 4; /* match length + 4才是实际match大小*/ 

        if unlikely(cpy > oend - COPYLENGTH - (STEPSIZE - 4)) {
            if (cpy > oend - LASTLITERALS) goto _output_error; /* last 5 bytes must be literals */
            LZ4_SECURECOPY(op, ref, (oend - COPYLENGTH));
            while(op < cpy) *op++ = *ref++;
            op = cpy;
            continue;
        }

        LZ4_WILDCOPY(op, ref, cpy);
        op = cpy; /* correction */
    }

    /* end of decoding */
    if (endOnInput)
        return (int) (((char *)op) - dest); /* Nb of output bytes decoded，解压得到了多少字符 */
    else
        return (int) (((char *)op) - source); /* Nb of input bytes read，读了多少个压缩字符 */

/* Overflow error detected */
_output_error:
    return (int) (-(((char *) ip) - source)) - 1; /* 负号表示出错，值表示Nb of input bytes read */
}

符合以下任一条件退出：

1. endOnInputSize

1.1 partial、cpy > oexit。出错情况：cpy > oend，或ip + length > iend。

1.2 full、cpy > oend - 12、ip + length == iend。出错情况：ip + length != iend，或cpy > oend。

1.3 ip + length > iend - 2 - (1 + 5)

1.3.1 partial。出错情况：cpy > oend，或ip + length > iend。

1.3.2 full、ip + length == iend。出错情况：ip + length != iend，或cpy > oend。

2. endOnOutputSize

2.1 cpy > oend - 8。

2.1.1 partial。出错情况：cpy > oend。

2.1.2 full、cpy == oend。出错情况：cpy != oend。

LZ4使用

make / make clean

得到可执行程序：lz4、lz4c

Usage:

./lz4 [arg] [input] [output]

input : a filename

Arguments :

-1 : Fast compression (default)

-9: High compression

-d : decompression (default for .lz4 extension)

-z : force compression

-f : overwrite output without prompting

-h/-H : display help/long help and exit

LZ4的输入只能为文件，不能为文件夹，毕竟一般压缩工具都不提供tar功能的。

-b file1 [file2] 可以用来测量压缩和解压速度。

比较遗憾的是，没有看可以指定线程数的参数，所以接下来没有测试多线程环境下的效果。

LZ4测试

Xeon E5504 @ 2.00GHz，X84_64，8核CPU，只用了一个。

(1) 速度

可以看到压缩速度和解压速度都很快，而且对日志文件的压缩比相当高。

(2) 压缩比

原始文件为linux-3.6.10.tar，大小为467MB。

用gzip压缩后为linux-3.6.10.tar.gz，大小为101MB，压缩比为21.62%。

用bzip2压缩后为linux-3.6.10.tar.bz2，大小为79MB，压缩比为16.91%。

用lz4压缩后为linux-3.6.10.tar.lz4，大小为166MB，压缩比为35.38%。

用lz4_HC压缩后为linux-3.6.10.tar.lz4，大小为117MB，压缩比为25.03%。

可以看到在压缩比上：lz4 < lz4_HC < gzip < bzip2。

然而在压缩过程中，笔者可以感觉到lz4的压缩时间比其它的要少一个数量级，几乎是瞬间完成：）

所以LZ4的优势在于压缩和解压速度，而不是压缩比。