Python Gzip Input Stream 实现

import zlib

# https://rationalpie.wordpress.com/2010/06/02/python-streaming-gzip-decompression/
class GzipInputStream(object):
    def __init__(self, f):
        self.f = f
        # this magic number can be inferred from the structure of a gzip file
        self.d = zlib.decompressobj(16+zlib.MAX_WBITS)

    def read(self, block_size = 64 * 1024):
        while True:
            data = self.f.read(block_size)
            if not data: return None
            data = self.d.decompress(data)
            return data

    # https://github.com/piskvorky/smart_open/blob/master/smart_open/smart_open_lib.py
    def __iter__(self):
        buf = b''
        while True:
            chunk = self.read(64 * 1024)
            if chunk is None:
                break
            buf += chunk
            start = 0
            while True:
                end = buf.find(b'\n', start) + 1
                if end:
                    yield buf[start:end]
                    start = end
                else:
                    buf = buf[start:]
                    break
        if buf:
            yield buf