GZipStream vs SharpZipLib

最近项目需要用到压缩,以前1.1的时候都用的第3方库,这次想尝试一下2.0带的 Compression库。
一用发现这个库压的东西偏大,而且没文件名没时间。

那么到底是为什么会这样呢,难道是我的使用有问题?

决定要搞搞明白,于是有了下面这段代码,用SharpZipLib和2.0自带的库做下对比,大家都采用Gzip压缩

使用2.0的库做压缩

        static MemoryStream Deflate(byte[] data)
        
{
            MemoryStream memoryStream 
= new MemoryStream();

            
using (GZipStream gzip = new GZipStream(memoryStream, CompressionMode.Compress, true))
            
{
                gzip.Write(data, 
0, data.Length);
                gzip.Flush();
                gzip.Close();
            }


            
return memoryStream;
        }


使用SharpZipLib压缩

        static MemoryStream DeflateUseSharpZipLib(byte[] data)
        
{
            MemoryStream memoryStream 
= new MemoryStream();

            
using (GZipOutputStream outStream = new GZipOutputStream(memoryStream))
            
{
                outStream.IsStreamOwner 
= false;
                outStream.Write(data, 
0, data.Length);
                outStream.Flush();
                outStream.Finish();
            }

            
return memoryStream;
        }


ok,之后看看我们的测试文件   1.pdf 3358kb

使用System.IO.Compression
testzip 1.pdf 0.zip 0
使用ICSharpCode.SharpZipLib.GZip
testzip 1.pdf 1.zip 1 

压缩后大小:
0.zip 2499kb(System)
1.zip 1612kb(SharpZipLib)

可以看到 ICSharpCode.SharpZipLib.GZip 压缩后1612kb,C# System.IO.Compression.GZipStream 压缩后大小2499kb

而且System.IO.Compression.GZipStream压缩的文件没文件名,时间也是不对的

那么是什么原因呢?
那就让我们跟踪看看

System.IO.Compression.GZipStream的实现

GZipStream类

        public override void Write(byte[] array, int offset, int count)
        
{
            
if (this.deflateStream == null)
            
{
                
throw new ObjectDisposedException(null, SR.GetString("ObjectDisposed_StreamClosed"));
            }

            
this.deflateStream.Write(array, offset, count);
        }


DeflateStream类

        public override void Write(byte[] array, int offset, int count)
        
{
            
this.EnsureCompressionMode();
            
this.ValidateParameters(array, offset, count);
            
this.InternalWrite(array, offset, count, false);
        }


        
internal void InternalWrite(byte[] array, int offset, int count, bool isAsync)
        
{
            
int deflateOutput;
            
while (!this.deflater.NeedsInput())
            
{
                deflateOutput 
= this.deflater.GetDeflateOutput(this.buffer);
                
if (deflateOutput != 0)
                
{
                    
if (isAsync)
                    
{
                        IAsyncResult asyncResult 
= this._stream.BeginWrite(this.buffer, 0, deflateOutput, nullnull);
                        
this._stream.EndWrite(asyncResult);
                    }

                    
else
                    
{
                        
this._stream.Write(this.buffer, 0, deflateOutput);
                    }

                }

            }

            
this.deflater.SetInput(array, offset, count);
            
while (!this.deflater.NeedsInput())
            
{
                deflateOutput 
= this.deflater.GetDeflateOutput(this.buffer);
                
if (deflateOutput != 0)
                
{
                    
if (isAsync)
                    
{
                        IAsyncResult result2 
= this._stream.BeginWrite(this.buffer, 0, deflateOutput, nullnull);
                        
this._stream.EndWrite(result2);
                    }

                    
else
                    
{
                        
this._stream.Write(this.buffer, 0, deflateOutput);
                    }

                }

            }

        }


        
public int GetDeflateOutput(byte[] output)
        
{
            
return this.encoder.GetCompressedOutput(output);
        }

Write 调用 InternalWrite,而InternalWrite调用GetDeflateOutput
encoder是FastEncoder对象,在Deflate构造函数中初始化

FastEncoder类

            public int GetCompressedOutput(byte[] outputBuffer)
            
{
                
this.output.UpdateBuffer(outputBuffer);
                
if (this.usingGzip && !this.hasGzipHeader)
                
{
                    
this.output.WriteGzipHeader(3);  
                    
this.hasGzipHeader = true;
                }

                
if (!this.hasBlockHeader)
                
{
                    
this.hasBlockHeader = true;
                    
this.output.WritePreamble();
                }

                
do
                
{
                    
int count = (this.inputBuffer.Count < this.inputWindow.FreeWindowSpace) ? this.inputBuffer.Count : this.inputWindow.FreeWindowSpace;
                    
if (count > 0)
                    
{
                        
this.inputWindow.CopyBytes(this.inputBuffer.Buffer, this.inputBuffer.StartIndex, count);
                        
if (this.usingGzip)
                        
{
                            
this.gzipCrc32 = DecodeHelper.UpdateCrc32(this.gzipCrc32, this.inputBuffer.Buffer, this.inputBuffer.StartIndex, count);
                            
uint num2 = this.inputStreamSize + ((uint) count);
                            
if (num2 < this.inputStreamSize)
                            
{
                                
throw new InvalidDataException(SR.GetString("StreamSizeOverflow"));
                            }

                            
this.inputStreamSize = num2;
                        }

                        
this.inputBuffer.ConsumeBytes(count);
                    }

                    
while ((this.inputWindow.BytesAvailable > 0&& this.output.SafeToWriteTo())
                    
{
                        
this.inputWindow.GetNextSymbolOrMatch(this.currentMatch);
                        
if (this.currentMatch.State == MatchState.HasSymbol)
                        
{
                            
this.output.WriteChar(this.currentMatch.Symbol);
                        }

                        
else
                        
{
                            
if (this.currentMatch.State == MatchState.HasMatch)
                            
{
                                
this.output.WriteMatch(this.currentMatch.Length, this.currentMatch.Position);
                                
continue;
                            }

                            
this.output.WriteChar(this.currentMatch.Symbol);
                            
this.output.WriteMatch(this.currentMatch.Length, this.currentMatch.Position);
                        }

                    }

                }

                
while (this.output.SafeToWriteTo() && !this.NeedsInput());
                
this.needsEOB = true;
                
return this.output.BytesWritten;
            }



注意到
output.WriteGzipHeader(3)函数,汗一下,压缩率写死了。。。

看看WriteGzipHeader是怎么写的

internal void WriteGzipHeader(int compression_level)
{
    
this.outputBuf[this.outputPos++= 0x1f;
    
this.outputBuf[this.outputPos++= 0x8b;
    
this.outputBuf[this.outputPos++= 8;
    
this.outputBuf[this.outputPos++= 0;
    
this.outputBuf[this.outputPos++= 0;
    
this.outputBuf[this.outputPos++= 0;
    
this.outputBuf[this.outputPos++= 0;
    
this.outputBuf[this.outputPos++= 0;
    
if (compression_level == 10)
    
{
        
this.outputBuf[this.outputPos++= 2;
    }

    
else
    
{
        
this.outputBuf[this.outputPos++= 4;
    }

    
this.outputBuf[this.outputPos++= 0;
}


再次晕倒,没扩展头信息,没时间,全部写死,如果看不明白,对照一下gzip的格式就知道了

gzip格式 http://www.gzip.org/zlib/rfc-gzip.html

至此,已经知道为什么2.0的Gzip压缩,没有文件名信息,也没有时间了,真的是很阳春白雪的压缩。

还有另外一个问题,为什么2.0自带的库压缩率低?

原文地址:https://www.cnblogs.com/yeye518/p/2769913.html