lzf
Table of Contents
http://oldhome.schmorp.de/marc/liblzf.html
1. Overview
这个压缩库非常轻量
- lzf.c 程序入口文件
- lzf.h 接口文件
- lzfP.h 配置文件
- lzf_c.c 压缩
- lzf_d.c 解压缩
其实主要的就是两个文件lzf_c.c和lzf_d.c
2. Compress
/* * Compress in_len bytes stored at the memory block starting at * in_data and write the result to out_data, up to a maximum length * of out_len bytes. * * If the output buffer is not large enough or any error occurs return 0, * otherwise return the number of bytes used, which might be considerably * more than in_len (but less than 104% of the original size), so it * makes sense to always use out_len == in_len - 1), to ensure _some_ * compression, and store the data uncompressed otherwise (with a flag, of * course. * * lzf_compress might use different algorithms on different systems and * even different runs, thus might result in different compressed strings * depending on the phase of the moon or similar factors. However, all * these strings are architecture-independent and will result in the * original data when decompressed using lzf_decompress. * * The buffers must not be overlapping. * */ unsigned int lzf_compress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len);
- in & out的内存区间不能重叠
- 如果out_len不够的话,返回0;否则返回压缩后大小。所以使用上可以out_len = in_len-1. 如果压缩之后空间变大的话那么直接使用原空间
- 不同版本lzf压缩同一个数据得到的结果不一定相同,取决于寻找repeatable string方法。但是均可以使用同样的解压缩函数解压。
压缩数据节(data section)有三种标识 a. literal b. short backref c. long backref.
/* * compressed format * * 000LLLLL <L+1> ; literal, L+1=1..33 octets * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset * */
配置文件中最重要的几个参数有:
- HLOG # 用于查找repeatable string的hashtable大小. 1 << HLOG
- VERY_FAST / ULTRA_FAST # 控制查找repeatble string策略
- LZF_USE_OFFSETS # 决定hashtable存储偏移还是存储pointer. 为了方便阅读代码,这里我们假设存储偏移。
- define LZF_HSLOT_BIAS ((const u8 *)in_data)
- typedef unsigned int LZF_HSLOT;
- typedef LZF_HSLOT LZF_STATE[1 << (HLOG)]; # hashtable定义
- STRICT_ALIGN # input数据是否对齐
压缩函数其实不长,所以这里我把代码稍作整理全部贴出来,然后附上相关注释
#ifndef FRST # define FRST(p) (((p[0]) << 8) | p[1]) # define NEXT(v,p) (((v) << 8) | p[2]) // 区别在于使用hash函数不同. 普通模式下运算量更多但是均匀效果应该会更好 # if ULTRA_FAST # define IDX(h) ((( h >> (3*8 - HLOG)) - h ) & (HSIZE - 1)) # elif VERY_FAST # define IDX(h) ((( h >> (3*8 - HLOG)) - h*5) & (HSIZE - 1)) # else # define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1)) # endif #endif // literal最大长度 #define MAX_LIT (1 << 5) // offset最大长度 #define MAX_OFF (1 << 13) // ref最大长度. 看long backref定义是L+8 ocets. 而L最长可以是8bits. #define MAX_REF ((1 << 8) + (1 << 3)) unsigned int lzf_compress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len ) { LZF_STATE htab; const u8 *ip = (const u8 *)in_data; u8 *op = (u8 *)out_data; const u8 *in_end = ip + in_len; u8 *out_end = op + out_len; const u8 *ref; unsigned long off; unsigned int hval; int lit; if (!in_len || !out_len) return 0; memset (htab, 0, sizeof (htab)); // 初始化hashtable. lit = 0; op++; /* start run */ // 这里空出1字节是为了处理literal. hval = FRST (ip); while (ip < in_end - 2) { LZ_HSLOT *hslot; hval = NEXT (hval, ip); // 此时hval = (ip[-1] << 24) | (ip[0] << 16) | (ip[1] << 8) | ip[2]. hslot = htab + IDX (hval); // 然后查找hashtable是否存在潜在相同的串,记为ref; 同时更新hashtable这个entry为ip. // 这里更新hashtable entry非常重要,因为offset是有限制的。如果不更新的话,那么超过offset长度限制的串 // 便不能被匹配以及压缩了。 ref = *hslot + LZF_HSLOT_BIAS; *hslot = ip - LZF_HSLOT_BIAS; if (1 && ref < ip /* the next test will actually take care of this, but this is faster */ // 这里真实偏移是(off+1). 但是只存储off.(see backref) && (off = ip - ref - 1) < MAX_OFF && ref > (u8 *)in_data // 检查ref和ip头三个字节是否相同. 至少3个字节才会压缩 && ref[2] == ip[2] #if STRICT_ALIGN && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0]) #else && *(u16 *)ref == *(u16 *)ip #endif ) { /* match found at *ref++ */ unsigned int len = 2; unsigned int maxlen = in_end - ip - len; // 最长可以ref多少字节 maxlen = maxlen > MAX_REF ? MAX_REF : maxlen; // 保守估计至少3个字节(long backref). 这里+1为下一轮查找literal准备 if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */ if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */ return 0; // 将之前的literal flush出来。这个后面会给出解释为什么可以这么做 op [- lit - 1] = lit - 1; /* stop run */ op -= !lit; /* undo run if length is zero */ for (;;) { if (expect_true (maxlen > 16)) { len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; len++; if (ref [len] != ip [len]) break; } do len++; while (len < maxlen && ref[len] == ip[len]); break; } // ip和ref公共串长度为len - 1.(比较tricky, 需要考虑一下) // 注意这里如果ip和ref存在overlapping也没有任何问题 len -= 2; /* len is now #octets - 1 */ ip++; if (len < 7) // short backref { *op++ = (off >> 8) + (len << 5); } else // long backref. { *op++ = (off >> 8) + ( 7 << 5); *op++ = len - 7; } *op++ = off; // 至此一轮repeatable string查找完毕。为下一轮literal准备. lit = 0; op++; /* start run */ // 输入串向前前进len+1字节 ip += len + 1; if (expect_false (ip >= in_end - 2)) break; // 如果是ULTRA_FAST回退一个字节做索引 // 如果是VERY FAST回退两个字节 // 普通模式的话会对这一个输入串做索引 #if ULTRA_FAST || VERY_FAST --ip; # if VERY_FAST && !ULTRA_FAST --ip; # endif hval = FRST (ip); hval = NEXT (hval, ip); htab[IDX (hval)] = ip - LZF_HSLOT_BIAS; ip++; # if VERY_FAST && !ULTRA_FAST hval = NEXT (hval, ip); htab[IDX (hval)] = ip - LZF_HSLOT_BIAS; ip++; # endif #else ip -= len + 1; do { hval = NEXT (hval, ip); htab[IDX (hval)] = ip - LZF_HSLOT_BIAS; ip++; } while (len--); #endif } else // 如果没有找到公共串的话那么输出literal. { /* one more literal byte we must copy */ if (expect_false (op >= out_end)) return 0; lit++; *op++ = *ip++; // 后面会讲解literal是怎么处理的 if (expect_false (lit == MAX_LIT)) { op [- lit - 1] = lit - 1; /* stop run */ lit = 0; op++; /* start run */ } } } if (op + 3 > out_end) /* at most 3 bytes can be missing here */ return 0; // 如果剩余串很短的话那么通用按照literal来处理。 while (ip < in_end) { lit++; *op++ = *ip++; if (expect_false (lit == MAX_LIT)) { op [- lit - 1] = lit - 1; /* stop run */ lit = 0; op++; /* start run */ } } op [- lit - 1] = lit - 1; /* end run */ op -= !lit; /* undo run if length is zero */ return op - (u8 *)out_data; }
literal处理比较有趣,大致方式如下
- "lit = 0; op++". # 因为literal需要一个额外字节,这里op++空出一个字节
- 可是有时候lit=0就退出了,这个时候op最后一个字节是废弃的,所以有"op -= !lit".
- 当lit == MAX_LIT的时候或者是flush时候(假设lit!=0. lit=0的情况上面分析过了). 比如lit = 3
- "op[-lit-1] = lit-1" 就是 "op[-4] = 2"
- op[-4]是literal开头的字节,而2+1则是literal长度
3. Decompress
相对于压缩函数,解压缩函数就没有那么多策略,完全是数据驱动。同样我把代码稍作整理添加少注释
// intel有rep movsb指令用来做memcpy. 之前做过实验发现效果并不理想 #if USE_REP_MOVSB /* small win on amd, big loss on intel */ #if (__i386 || __amd64) && __GNUC__ >= 3 # define lzf_movsb(dst, src, len) \ asm ("rep movsb" \ : "=D" (dst), "=S" (src), "=c" (len) \ : "0" (dst), "1" (src), "2" (len)); #endif #endif unsigned int lzf_decompress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len) { u8 const *ip = (const u8 *)in_data; u8 *op = (u8 *)out_data; u8 const *const in_end = ip + in_len; u8 *const out_end = op + out_len; do { unsigned int ctrl = *ip++; if (ctrl < (1 << 5)) /* literal run */ { ctrl++; if (op + ctrl > out_end) { SET_ERRNO (E2BIG); return 0; } #ifdef lzf_movsb lzf_movsb (op, ip, ctrl); #else switch (ctrl) { case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++; case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++; case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++; case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++; case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++; case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case 9: *op++ = *ip++; case 8: *op++ = *ip++; case 7: *op++ = *ip++; case 6: *op++ = *ip++; case 5: *op++ = *ip++; case 4: *op++ = *ip++; case 3: *op++ = *ip++; case 2: *op++ = *ip++; case 1: *op++ = *ip++; } #endif } else /* back reference */ { unsigned int len = ctrl >> 5; u8 *ref = op - ((ctrl & 0x1f) << 8) - 1; if (len == 7) { len += *ip++; } ref -= *ip++; if (op + len + 2 > out_end) { SET_ERRNO (E2BIG); return 0; } if (ref < (u8 *)out_data) { SET_ERRNO (EINVAL); return 0; } #ifdef lzf_movsb len += 2; lzf_movsb (op, ref, len); #else switch (len) { default: len += 2; // 处理ip和ref公共串存在overlapping的情况 if (op >= ref + len) { /* disjunct areas */ memcpy (op, ref, len); op += len; } else { /* overlapping, use octte by octte copying */ do *op++ = *ref++; while (--len); } break; case 9: *op++ = *ref++; case 8: *op++ = *ref++; case 7: *op++ = *ref++; case 6: *op++ = *ref++; case 5: *op++ = *ref++; case 4: *op++ = *ref++; case 3: *op++ = *ref++; case 2: *op++ = *ref++; case 1: *op++ = *ref++; case 0: *op++ = *ref++; /* two octets more */ *op++ = *ref++; } #endif } } while (ip < in_end); return op - (u8 *)out_data; }