Skip to content

Instantly share code, notes, and snippets.

@DavidBuchanan314
Created August 19, 2023 16:15
Show Gist options
  • Save DavidBuchanan314/b223bce114ec715a66f17f0b8af3afa3 to your computer and use it in GitHub Desktop.
Save DavidBuchanan314/b223bce114ec715a66f17f0b8af3afa3 to your computer and use it in GitHub Desktop.
import zlib
from io import BytesIO
# TODO: make this shorter
RLEFLATE_MAGIC = b'\xed\xe3\t\x90$I\x92$I\xcc\xff\xff\xff\xff\xff\xff\xff\xff'\
b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'\
b'\xff\xff\xff\xff?3333\xe1\x0e\x00\x0e``\xa0\xaa*"\xc2\xdc]U\x04\x00\x80'
RLEFLATE_EOF = b'\x77'
RLEFLATE_REPEAT = [
None, b'W', b'7', b'G', b'g', b'#', b'3', b'+', b';', b'\x11', b'Q',
b'\x19', b'Y', b'\x15', b'U', b'\x1d', b']', b'\n', b'*', b'J', b'j',
b'\x0e', b'.', b'N', b'n', b'\xf7\x0b', b'\xf7+', b'\xf7K', b'\xf7k',
b'\xf7\x0f', b'\xf7/', b'\xf7O', b'\xf7o', b'\xf7\x04', b'\xf7\x14',
b'\xf7$', b'\xf74', b'\xf7D', b'\xf7T', b'\xf7d', b'\xf7t', b'\xf7\x06',
b'\xf7\x16', b'\xf7&', b'\xf76', b'\xf7F', b'\xf7V', b'\xf7f', b'\xf7v',
b'\xf7\x05', b'\xf7\x15', b'\xf7%', b'\xf75', b'\xf7E', b'\xf7U', b'\xf7e',
b'\xf7u', b'\x04', b'\x14', b'$', b'4', b'D', b'T', b'd', b't'
]
RLEFLATE_REPEAT_PREFIX = b'\xf7'
# Yes, this really is faster than bit twiddling...
def bitrev_u8(n):
return int(f"{n:08b}"[::-1], 2)
def encode_literal_pair(a, b):
tmp = bitrev_u8(a)
return bytes([
0x0f | ((tmp << 4) & 0xf0),
0xf0 | (tmp >> 4),
bitrev_u8(b)
])
def rledeflate(data):
out = RLEFLATE_MAGIC
i = 0
while i + 3 < len(data):
runlen = 0
while data[i-4:i] == data[i+runlen*4:i+runlen*4+4] \
and runlen < 64 \
and i >= 4:
runlen += 1
if runlen == 0:
out += encode_literal_pair(data[i+0], data[i+1]) + \
encode_literal_pair(data[i+2], data[i+3])
i += 4
else:
out += RLEFLATE_REPEAT[runlen]
i += runlen * 4
remainder = len(data) - i
if remainder >= 2:
out += encode_literal_pair(data[i+0], data[i+1])
i += 2
remainder -= 2
# 1 byte left to output, EOF marker will straddle the last byte
if remainder:
tmp = bitrev_u8(data[i])
out += bytes([
0x0f | ((tmp << 4) & 0xf0),
0x70 | (tmp >> 4),
0x07, # EOF
])
return out
# we'll reach here on all even-length messages
out += RLEFLATE_EOF
return out
def rleinflate(data):
data = BytesIO(data)
magic = data.read(len(RLEFLATE_MAGIC))
assert(magic == RLEFLATE_MAGIC)
out = b""
while True:
assert(tmp := data.read(1))
if tmp == RLEFLATE_EOF:
return out
if tmp[0] & 0x0f == 0x0f:
next0 = data.read(1)[0]
next1 = data.read(1)[0]
out += bytes([bitrev_u8(
(tmp[0] >> 4) | ((next0 << 4) & 0xF0)
)])
if next0 & 0xf0 == 0x70: # EOF
assert(next1 == 0x07)
return out
out += bytes([bitrev_u8(next1)])
continue
if tmp == RLEFLATE_REPEAT_PREFIX:
assert(next0 := data.read(1))
tmp += next0
repcount = RLEFLATE_REPEAT.index(tmp)
assert(len(out) >= 4)
out += out[-4:] * repcount
def zlib_inflate(data):
d = zlib.decompressobj(wbits=-15)
return d.decompress(data) + d.flush(zlib.Z_FINISH)
msg = b"Helloooooooooooooooooooooooooooooo, world!" + b"!" * 1000
compressed = rledeflate(msg)
print(compressed)
print("ratio:", len(compressed)/len(msg))
zlib_reinflated = zlib_inflate(compressed)
print(zlib_reinflated)
assert(msg == zlib_reinflated)
rleinflated = rleinflate(compressed)
print(rleinflated)
assert(msg == rleinflated)
assert(rleinflated == zlib_reinflated)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment