Python Gzip Input Stream 实现
import zlib
# https://rationalpie.wordpress.com/2010/06/02/python-streaming-gzip-decompression/
class GzipInputStream(object):
def __init__(self, f):
self.f = f
# this magic number can be inferred from the structure of a gzip file
self.d = zlib.decompressobj(16+zlib.MAX_WBITS)
def read(self, block_size = 64 * 1024):
while True:
data = self.f.read(block_size)
if not data: return None
data = self.d.decompress(data)
return data
# https://github.com/piskvorky/smart_open/blob/master/smart_open/smart_open_lib.py
def __iter__(self):
buf = b''
while True:
chunk = self.read(64 * 1024)
if chunk is None:
break
buf += chunk
start = 0
while True:
end = buf.find(b'\n', start) + 1
if end:
yield buf[start:end]
start = end
else:
buf = buf[start:]
break
if buf:
yield buf