zlib源码分析—compress函数
本篇博客的⽬的是分析compress.c中的函数,主要⽬的是学习deflate函数的使⽤。先从compress函数说起,compress函数其实就是以压缩级别Z_DEFAULT_COMPRESSION调⽤函数compress2。从zlib.h中第190⾏的宏定义可以看出zlib⽀持的压缩级别分为
Z_NO_COMPRESSION(不压缩模式)、Z_BEST_SPEED(最快速度压缩模式)、Z_BEST_COMPRESSION(最⾼压缩率压缩模式)、
Z_DEFAULT_COMPRESSION(默认压缩模式)。
int ZEXPORT compress (Bytef *dest, uLongf *destLen,const Bytef *source, uLong sourceLen)
{
return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
}
// zlib.h中第190⾏
#define Z_NO_COMPRESSION        0
#define Z_BEST_SPEED            1
#define Z_BEST_COMPRESSION      9
#define Z_DEFAULT_COMPRESSION  (-1)
/* compression levels */
compress2函数将source缓冲区中的数据压缩存储到dest缓冲区,sourceLen是源缓冲区的长度(以字节为单位),destLen是⽬的缓冲区的⼤⼩(注意这⾥是指针,也就是调⽤时需要使⽤地址输⼊),需要⽐sourceLen⼤⾄少0.1%加上12字节,以应对压缩最坏情况的发⽣。level参数和deflateInit函数的level参数相同,⽤于表⽰压缩级别。
如果函数调⽤成功,返回Z_OK;如果没有⾜够内存,返回Z_MEM_ERROR;如果输出缓冲区中没有⾜够空间,返回Z_BUF_ERROR;如果level参数⾮法,返回Z_STREAM_ERROR。
int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
Bytef *dest;
uLongf *destLen;
const Bytef *source;
uLong sourceLen;
int level;
{
z_stream stream;
int err;
const uInt max =(uInt)-1;
uLong left;
left =*destLen;// 保存输⼊参数的⽬的缓冲的⼤⼩(⽤户提供的⽬的缓冲⼤⼩,)
*destLen =0;// 清零,⽤于传输最终压缩后数据占⽤缓冲⼤⼩⼤⼩
stream.zalloc =(alloc_func)0;// zalloc、zfree可⾃定义内存分配释放函数
stream.zfree =(free_func)0;// 如果不使⽤⾃定义函数(赋值为0),则在deflateInit函数初始化过程中使⽤默认
stream.opaque =(voidpf)0;// 传递给zalloc、zfree的私有数据
err =deflateInit(&stream, level);
if(err != Z_OK)return err;
<_out = dest;
stream.avail_out =0;
<_in =(z_const Bytef *)source;
stream.avail_in =0;
do{
if(stream.avail_out ==0){// 上次迭代所给定next_out缓冲区使⽤完毕,再次指定可获得的缓冲区
stream.avail_out = left >(uLong)max ? max :(uInt)left;
left -= stream.avail_out;
}
if(stream.avail_in ==0){
stream.avail_in = sourceLen >(uLong)max ? max :(uInt)sourceLen;
sourceLen -= stream.avail_in;
}
err =deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
}while(err == Z_OK);
*destLen = al_out;
deflateEnd(&stream);
return err == Z_STREAM_END ? Z_OK : err;
}
以⼀个不恰当的例⼦来形容这段程序的逻辑,next_in就像⼀罐奶粉,next_out就像马桶。stream.avail_in==0这段的逻辑代表奶瓶是否为空,整个do…while循环形象地说就像⼀只四脚吞⾦兽(泛指幼⼉),在每次迭代(喂⾷时)不能⼀下⼦就把⼀罐奶粉喂给⼩朋友,⽽是⼀瓶⼀瓶地泡着喂,avail_out同样的道理,是指⽰拉了多少。当deflate宝宝吃完(进⾏完⼀次迭代),并且吃完⼀瓶(stream.avail_in==0),就要冲奶粉(stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen),奶粉罐⾥⾯会少相应奶粉(sourceLen -= stream.avail_in)。同样道理,stream.avail_out就不⽤举例说明了。这⾥的sourceLen > (uLong)max ? max : (uInt)sourceLen就是防⽌宝宝奶粉冲太厚了。⽽stream中的total_in和total_out则记录了宝宝吃了多少奶粉以及拉了多少臭臭。
下⾯分析⼀下deflateInit函数,deflateInit其实是⼀个宏定义。实际上就是带有ZLIB_VERSION,sizeof(z_stream)实参的deflateInit_函数。
从下⾯代码的调⽤可以看出deflateInit_函数可以看出其调⽤了deflateInit2_函数,传⼊参数Z_DEFLATED(deflate压缩⽅法,⽬前只⽀持这种⽅式),MAX_WBITS(#define MAX_WBITS 15),DEF_MEM_LEVEL(#define DEF_MEM_LEVEL
8),Z_DEFAULT_STRATEGY(#define Z_DEFAULT_STRATEGY 0 压缩策略)。
int ZEXPORT deflateInit_(z_streamp strm,int level,const char*version,int stream_size)
{
return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, version, stream_size);
/* To do: ignore strm->next_in if we use it as window */
}
#define Z_DEFLATED  8
/* The deflate compression method (the only one supported in this version) */
#define Z_FILTERED            1
#define Z_HUFFMAN_ONLY        2
#define Z_RLE                3
#define Z_FIXED              4
#define Z_DEFAULT_STRATEGY    0
251
MAX_WBITS和LZ77窗⼝⼤⼩有关,⽐如LZ77窗⼝⼤⼩配为32K(),则MAX_WBITS为15。
int ZEXPORT deflateInit2_(z_streamp strm,int level,int method,int windowBits,int memLevel,int strategy,const char*version,int stream_size) {
deflate_state *s;
int wrap =1;
static const char my_version[]= ZLIB_VERSION;
ushf *overlay;
/* We overlay pending_buf and d_buf+l_buf. This works since the average
* output size for (length,distance) codes is <= 24 bits.
*/
if(version == Z_NULL || version[0]!= my_version[0]||
stream_size !=sizeof(z_stream)){
return Z_VERSION_ERROR;
}
if(strm == Z_NULL)return Z_STREAM_ERROR;
strm->msg = Z_NULL;
//未⾃定义内存分配释放函数,使⽤默认函数
if(strm->zalloc ==(alloc_func)0){
strm->zalloc = zcalloc;
strm->opaque =(voidpf)0;
}
if(strm->zfree ==(free_func)0)
strm->zfree = zcfree;
#ifdef FASTEST
if(level !=0) level =1;
#else
if(level == Z_DEFAULT_COMPRESSION) level =6;
#endif
if(windowBits <0){/* suppress zlib wrapper */
wrap =0;
windowBits =-windowBits;
}
#ifdef GZIP
else if(windowBits >15){
wrap =2;/* write gzip wrapper instead */
windowBits -=16;
}
#endif
if(memLevel <1|| memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
windowBits <8|| windowBits >15|| level <0|| level >9||
strategy <0|| strategy > Z_FIXED ||(windowBits ==8&& wrap !=1)){
return Z_STREAM_ERROR;
}
if(windowBits ==8) windowBits =9;/* until 256-byte window bug fixed */
s =(deflate_state *)ZALLOC(strm,1,sizeof(deflate_state));
s =(deflate_state *)ZALLOC(strm,1,sizeof(deflate_state));
if(s == Z_NULL)return Z_MEM_ERROR;
strm->state =(struct internal_state FAR *)s;
s->strm = strm;
s->status = INIT_STATE;/* to pass state test in deflateReset() */
s->wrap = wrap;
s->gzhead = Z_NULL;
s->w_bits =(uInt)windowBits;
s->w_size =1<< s->w_bits;
s->w_mask = s->w_size -1;
s->hash_bits =(uInt)memLevel +7;
s->hash_size =1<< s->hash_bits;
s->hash_mask = s->hash_size -1;
s->hash_shift =((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
s->window =(Bytef *)ZALLOC(strm, s->w_size,2*sizeof(Byte));
s->prev  =(Posf *)ZALLOC(strm, s->w_size,sizeof(Pos));
s->head  =(Posf *)ZALLOC(strm, s->hash_size,sizeof(Pos));
s->high_water =0;/* nothing written to s->window yet */
s->lit_bufsize =1<<(memLevel +6);/* 16K elements by default */
overlay =(ushf *)ZALLOC(strm, s->lit_bufsize,sizeof(ush)+2);
s->pending_buf =(uchf *) overlay;
s->pending_buf_size =(ulg)s->lit_bufsize *(sizeof(ush)+2L);
if(s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
s->pending_buf == Z_NULL){
s->status = FINISH_STATE;
strm->msg =ERR_MSG(Z_MEM_ERROR);
deflateEnd (strm);
return Z_MEM_ERROR;
}
s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
s->l_buf = s->pending_buf +(1+sizeof(ush))*s->lit_bufsize;
s->level = level;
s->strategy = strategy;
s->method =(Byte)method;
return deflateReset(strm);
}
下⾯先看看对压缩级别level的处理,分两种情况:
定义了FASTEST
deflateInit/deflateInit_/deflateInit2_函数输⼊level deflate_state结构体中的level Z_NO_COMPRESSION(0)0
Z_BEST_SPEED(1)1
21
31
41
51
61
71
81
deflateInit/deflateInit_/deflateInit2_函数输⼊level deflate_state结构体中的level
Z_BEST_COMPRESSION(9)1
Z_DEFAULT_COMPRESSION(-1)1
未定义了FASTEST
deflateInit/deflateInit_/deflateInit2_函数输⼊level deflate_state结构体中的level
Z_NO_COMPRESSION(0)0
Z_BEST_SPEED(1)1
22
33
44
55
66
77
88
Z_BEST_COMPRESSION(9)
pendingZ_DEFAULT_COMPRESSION(-1)6
现在看看对windowBits和wrap的处理:未定义GZIP时,windowBits处于闭区间[8, 15]时,wrap等于1;windowBits处于闭区间[-15, -8]时,wrap等于0,windowBits变为相反数,处于闭区间[8, 15],但是要剔除windowBits=8,且wrap=0的情况,也就是windowBits的相反数只能处于[9, 15]之间。
if(windowBits <0){/* suppress zlib wrapper */
wrap =0;
windowBits =-windowBits;
}
if(...||(windowBits ==8&& wrap !=1)){
return Z_STREAM_ERROR;
}
当定义了GZIP时(gzip模式),zlib wrapper处理逻辑相同。但是当windowBits⼤于15时,wrap=2,windowBits需要减去16。这⾥注意windowBits不能等于24时,因为下⾯判断的逻辑不成⽴了。
输⼊windowBits windowBits wrap模式[8, 15][8, 15]1ZLIB
[-15, -9][15, 9]0ZLIB x >15且x!=24x-162GZIP
注意:由于256字节的滑动窗的实现有bug,所以windowBits等于8时需要修改为9。
215
⽐如LZ77窗⼝⼤⼩是32K(),w_bits是则是15,w_size就是1<<15,w_mask就是⼆进制111 1111 1111 1111。从下⾯代码中可以看出LZ77窗⼝申请的⼤⼩实际是64KB。
s->w_bits =(uInt)windowBits;
s->w_size =1<< s->w_bits;
s->w_mask = s->w_size -1;
s->window =(Bytef *)ZALLOC(strm, s->w_size,2*sizeof(Byte));//注意这⾥是2倍的字节
和LZ77的hash链相关,后⾯再详细讲解,这⾥只需要知道memLevel的⼤⼩在区间[1, 9]中。

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。