shithub: mc

Download patch

ref: 05764878243785d7bae1dda067d86eb9bcaec443
parent: e4e8fa6f7760221f843a924f058d11b58041db64
author: Quentin Carbonneaux <[email protected]>
date: Thu May 24 16:20:21 EDT 2018

libflate (DEFLATE decoding)

Hi,

As an official summer of cod entry, here is a start for
the libflate library, which will eventually contain
Myrddin's [de]compression equipment.

It currently exposes a single function

	const decode : (outf : bio.file#, inf : bio.file# -> std.result(void, err))

that decodes a DEFLATE stream in the inf file into the
outf file. The function guarantees that it will not read
more than necessary from inf to either, figure out that
the stream is bogus, or fully uncompress the stream.

I did my due diligence with respect to testing but I am
not fully confident about the correctness of the
implementation; feel free to add more tests, or test it
on example files.

I wish:
  - it was fast (don't expect gunzip's speed)
  - it was better tested
  - it had stricter format checks (e.g., checking for
    invalid Huffman codes)
  - I had implemented zlib/gzip decompression

On the other hand, the current implementation is quite
simple and appears to work.

Cheers,

-- q

--- a/lib/bld.sub
+++ b/lib/bld.sub
@@ -4,6 +4,7 @@
 	date
 	escfmt
 	fileutil
+	flate
 	http
 	inifile
 	iter
--- /dev/null
+++ b/lib/flate/bld.sub
@@ -1,0 +1,13 @@
+lib flate =
+	flate.myr
+	types.myr
+
+	lib ../bio:bio
+	lib ../std:std
+;;
+
+testdeps =
+	../testr:testr
+	../bio:bio
+	../std:std
+;;
--- /dev/null
+++ b/lib/flate/flate.myr
@@ -1,0 +1,396 @@
+use std
+use bio
+use "types"
+
+/* DEFLATE  https://www.ietf.org/rfc/rfc1951.txt
+
+   Terminology:
+     - A 'Huffman code' is one mechanism to map codes to symbols.
+     - A 'code' is the sequence of bits used to encode a symbol
+       in a Huffman code.  Codes are what appears in the DEFLATE
+       stream.
+     - A 'symbol' is what a code decodes to.
+     - A 'length' is the length of an LZ77 reference (the number
+       of bytes to repeat).
+     - A 'distance' is the distance at which the bits referred to
+       by an LZ77 reference are from the current point.
+*/
+
+pkg flate =
+	/* state maintained during decompression */
+	type flatedec = struct
+		rdf : bio.file#
+		wrf : bio.file#
+
+		/* one-byte buffer */
+		bval : int /* in [0, 255] current input byte */
+		bpos : int /* in [0, 8] */
+
+		/* ring buffer window for the bytes output */
+		wpos : std.size
+		wlen : std.size
+		wbuf : byte[WinSz]
+
+		/* Huffman codes */
+		lenhc : hufc /* for lengths */
+		dsthc : hufc /* for distances */
+	;;
+
+	/* one Huffman code */
+	type hufc = struct
+		/* # of codes of length [1, MaxBits] */
+		count : int[MaxBits + 1]
+		/* symbols sorted lexicographically by
+		   <code length, symbol value>; c.f., mkhufc()
+		   and readcode() for usage */
+		symbol : int[MaxSyms]
+	;;
+
+	/* decode a DEFLATE stream */
+	const decode : (outf : bio.file#, inf : bio.file# -> std.result(void, err))
+
+	pkglocal const MaxSyms : std.size
+	pkglocal const MaxBits : std.size
+	pkglocal const WinSz : std.size
+	pkglocal const bits : (st : flatedec#, n : int -> std.result(int, err))
+	pkglocal const nocomp : (st : flatedec# -> std.result(void, err))
+	pkglocal const mkhufc : (lengths : int[:], hc : hufc# -> void)
+	pkglocal const readcode : (st : flatedec#, hc : hufc# -> std.result(int, err))
+	pkglocal const outb : (st : flatedec#, b : byte -> std.result(void, err))
+;;
+
+const MaxLenSyms : std.size = 288 /* max # of length symbols */
+const MaxDstSyms : std.size = 32 /* max # of distance symbols */
+const MaxSyms = MaxLenSyms /* max # of symbols in a Huffman code */
+const MaxBits = 15 /* max length of a code (in bits) */
+
+const WinSz = 32 * std.KiB /* max distance for a reference */
+
+var fixedlenhc : hufc
+var fixeddsthc : hufc
+
+const __init__ = {
+	/* the fixed Huffman codes; c.f., RFC Section 3.2.6 */
+	var length : int[MaxSyms]
+
+	std.slfill(length[0:144], 8)
+	std.slfill(length[144:256], 9)
+	std.slfill(length[256:280], 7)
+	std.slfill(length[280:288], 8)
+	mkhufc(length[:], &fixedlenhc)
+
+	std.slfill(length[0:32], 5)
+	mkhufc(length[0:32], &fixeddsthc)
+}
+
+const outb = {st, b
+	/* outputs a single byte, to the output file; additionally
+	   record the byte emitted in the output window */
+	var pos
+
+	match bio.putb(st.wrf, b)
+	| `std.Ok 0: std.die("no bytes written by bio.putb()")
+	| `std.Ok _:
+	| `std.Err e: -> `std.Err (`Io e)
+	;;
+	pos = st.wpos
+	if pos == WinSz
+		std.assert(st.wlen >= WinSz, "window should be full")
+		pos = 0
+	;;
+	st.wbuf[pos] = b
+	st.wpos = pos + 1
+	st.wlen++ /* don't care about it being > WinSz */
+	-> `std.Ok void
+}
+
+const bits = {st, n
+	var pos, len, bits, bval
+
+	std.assert(n >= 0 && n <= 31, "invalid argument")
+	pos = st.bpos
+	bits = st.bval >> pos
+	for len = 8 - pos; len < n; len += 8
+		match bio.getb(st.rdf)
+		| `std.Ok b:
+			bval = (b : int)
+			st.bval = bval
+			bits += bval << len
+		| `std.Err e: -> `std.Err (`Io e)
+		;;
+	;;
+	st.bpos = 8 - (len - n)
+	-> `std.Ok (bits & ((1 << n) - 1))
+}
+
+const nocomp = {st
+	/* process a non-compressed block; c.f., RFC Section 3.2.4 */
+	var len
+
+	st.bpos = 8 /* skip to a byte boundary */
+	match bits(st, 16)
+	| `std.Ok n: len = (n : std.size)
+	| `std.Err e: -> `std.Err e
+	;;
+	match bits(st, 16)
+	| `std.Ok nlen:
+		if len != (nlen : std.size) ^ 0xffff
+			-> `std.Err (`Format \
+				"non-compressed block length could " \
+				"not be verified")
+		;;
+	| `std.Err e: -> `std.Err e
+	;;
+	for var n = 0; n < len; n++
+		match bio.getb(st.rdf)
+		| `std.Ok b:
+			match outb(st, b)
+			| `std.Ok _:
+			| err: -> err
+			;;
+		| `std.Err e: -> `std.Err (`Io e)
+		;;
+	;;
+	-> `std.Ok void
+
+}
+
+const mkhufc = {length, hc
+	/* note: it is possible to have 0s in the length array; this means
+	   that the symbol at this index will never be part of the input
+	   stream */
+	var index : int[MaxBits + 1]
+
+	std.assert(length.len <= hc.symbol.len, "invalid argument")
+	std.slfill(hc.count[:], 0)
+	for var symb = 0; symb < length.len; symb++
+		hc.count[length[symb]]++
+	;;
+	std.slfill(index[:], 0)
+	hc.count[0] = 0
+	for var l = 1; l <= MaxBits; l++
+		index[l] = index[l - 1] + hc.count[l - 1]
+	;;
+	for var symb = 0; symb < length.len; symb++
+		if length[symb] != 0
+			hc.symbol[index[length[symb]]++] = symb
+		;;
+	;;
+	/* TODO: validate the Huffman code */
+}
+
+const readcode = {st, hc
+	/* if you want to know more about the algorithm here, you
+	   must look for "canonical Huffman codes"; see, for example,
+	     Managing Gigabytes: Compressing and Indexing Documents
+	       and Images
+	     by I. H. Witten, A. Moffat, and T. C. Bell. */
+	var code, first, index, count
+
+	code = 0 /* code seen so far */
+	first = 0 /* first code of length len */
+	index = 0 /* index of the first code of length len in hc.symbol[:] */
+	for var len = 1; len <= MaxBits; len++
+		match bits(st, 1)
+		| `std.Ok n: code += n
+		| err: -> err
+		;;
+		count = hc.count[len]
+		if code < first + count
+			/* the code is exactly len bits long;
+			   return the symbol from the table */
+			-> `std.Ok (hc.symbol[index + (code - first)])
+		;;
+		index += count
+		first = (first + count) << 1 /* c.f. RFC Section 3.2.2 */
+		code <<= 1 /* make room for the next bit */
+	;;
+	-> `std.Err (`Format "invalid code")
+}
+
+const decomp = {st
+	/* process data of a compressed block; c.f., RFC Section 3.2.5 */
+	const basel = [
+		3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+		35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
+	]
+	const extral = [
+		0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3,
+		3, 4, 4, 4, 4, 5, 5, 5, 5, 0
+	]
+	const based = [
+		1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+		257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+		8193, 12289, 16385, 24577
+	]
+	const extrad = [
+		0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+		8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
+	]
+	var symb, len, dst, index
+
+	symb = 0
+	while symb != 256
+		match readcode(st, &st.lenhc)
+		| `std.Ok s: symb = s
+		| `std.Err e: -> `std.Err e
+		;;
+		if symb < 256
+			match outb(st, (symb : byte))
+			| `std.Ok _:
+			| err: -> err
+			;;
+		elif symb > 256
+			symb -= 257
+			match bits(st, extral[symb])
+			| `std.Ok x: len = basel[symb] + x
+			| `std.Err e: -> `std.Err e
+			;;
+			match readcode(st, &st.dsthc)
+			| `std.Ok s: symb = s
+			| `std.Err e: -> `std.Err e
+			;;
+			match bits(st, extrad[symb])
+			| `std.Ok x: dst = (based[symb] + x : std.size)
+			| `std.Err e: -> `std.Err e
+			;;
+			if dst > st.wlen
+				-> `std.Err (`Format \
+					"reference to a byte before file start")
+			;;
+			index = (WinSz + st.wpos - dst) % WinSz
+			for ; len > 0; len--
+				match outb(st, st.wbuf[index])
+				| `std.Ok _:
+				| err: -> err
+				;;
+				index = (index + 1) % WinSz
+			;;
+		;;
+	;;
+	-> `std.Ok void
+}
+
+const dynamic = {st
+	/* process a block compressed with dynamic Huffman codes;
+	   c.f., RFC Section 3.2.7 */
+	const coden = [
+		16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2,
+		14, 1, 15
+	]
+	const extra = [2, 3, 7]
+	var length : int[MaxLenSyms + MaxDstSyms]
+	var nlen, ndst, ncode, symb, nbits, hufc
+
+	match bits(st, 5)
+	| `std.Ok n: nlen = n + 257
+	| `std.Err e: -> `std.Err e
+	;;
+	if nlen > 286
+		-> `std.Err (`Format "invalid number of length codes")
+	;;
+	match bits(st, 5)
+	| `std.Ok n: ndst = n + 1
+	| `std.Err e: -> `std.Err e
+	;;
+	if ndst > 30
+		-> `std.Err (`Format "invalid number of distance codes")
+	;;
+	match bits(st, 4)
+	| `std.Ok n: ncode = n + 4
+	| `std.Err e: -> `std.Err e
+	;;
+	std.slfill(length[:19], 0)
+	for var c = 0; c < ncode; c++
+		match bits(st, 3)
+		| `std.Ok n: length[coden[c]] = n
+		| `std.Err e: -> `std.Err e
+		;;
+	;;
+	mkhufc(length[:19], &hufc)
+	for var c = 0; c < nlen + ndst;
+		/* according to the RFC: "the code length repeat codes
+		   can cross from HLIT + 257 to the HDIST + 1 code
+		   lengths"; this is why we have a single loop here */
+		match readcode(st, &hufc)
+		| `std.Ok n: symb = n
+		| `std.Err e: -> `std.Err e
+		;;
+		if symb > 18
+			-> `std.Err (`Format "invalid code code")
+		;;
+		if symb < 16
+			length[c++] = symb
+		else
+			/* repeat some code length */
+			match bits(st, extra[symb - 16])
+			| `std.Ok n: nbits = n
+			| `std.Err e: -> `std.Err e
+			;;
+			if symb == 16
+				/* copy the previous code length 3-6 times */
+				nbits += 3
+				if c == 0 || c + nbits > nlen + ndst
+					-> `std.Err (`Format \
+						"invalid code repetition")
+				;;
+				std.slfill(length[c:c+nbits], length[c-1])
+			else
+				/* repeat a 0 length */
+				if symb == 17
+					nbits += 3
+				else
+					nbits += 11
+				;;
+				if c + nbits > nlen + ndst
+					-> `std.Err (`Format \
+						"invalid zero repetition")
+				;;
+				std.slfill(length[c:c+nbits], 0)
+			;;
+			c += nbits
+		;;
+	;;
+	if length[256] == 0
+		-> `std.Err (`Format "no code length of end-of-block symbol")
+	;;
+	mkhufc(length[:nlen], &st.lenhc)
+	mkhufc(length[nlen:nlen + ndst], &st.dsthc)
+	-> decomp(st)
+}
+
+const decode = {outf, inf
+	var st, err, last
+
+	st = [
+		.rdf = inf,
+		.wrf = outf,
+		.bpos = 8,
+	]
+	last = 0
+	while last != 1
+		match bits(&st, 1)
+		| `std.Ok l: last = l
+		| `std.Err e: -> `std.Err e
+		;;
+		match bits(&st, 2)
+		| `std.Ok 0:
+			err = nocomp(&st)
+		| `std.Ok 1:
+			st.lenhc = fixedlenhc
+			st.dsthc = fixeddsthc
+			err = decomp(&st)
+		| `std.Ok 2:
+			err = dynamic(&st)
+		| `std.Ok _:
+			-> `std.Err (`Format "invalid block type")
+		| `std.Err e:
+			-> `std.Err e
+		;;
+		match err
+		| `std.Ok _:
+		| `std.Err e: -> `std.Err e
+		;;
+	;;
+	-> `std.Ok void
+}
--- /dev/null
+++ b/lib/flate/test/flate.myr
@@ -1,0 +1,295 @@
+use bio
+use flate
+use std
+use testr
+
+impl std.equatable std.result(int, flate.err) =
+	eq = {a, b
+		match (a, b)
+		| (`std.Ok na, `std.Ok nb): -> na == nb
+		| (`std.Err (`flate.Io ea), `std.Err (`flate.Io eb)): -> ea == eb
+		| (`std.Err (`flate.Format ea), `std.Err (`flate.Format eb)):
+			-> std.sleq(ea, eb)
+		| _: -> false
+		;;
+	}
+;;
+
+const init = {in -> flate.flatedec#
+	-> std.mk([
+		.rdf = bio.mkmem(in),
+		.bpos = 8,
+	])
+}
+
+const eof = `std.Err (`flate.Io `bio.Eof)
+
+const bits = {ctx
+	var st
+
+	st = init("")
+	testr.eq(ctx, flate.bits(st, 1), eof)
+
+	st = init(" ")
+	testr.eq(ctx, flate.bits(st, 9), eof)
+
+	/* simple bit reading */
+	st = init("\xaa")
+	testr.eq(ctx, flate.bits(st, 0), `std.Ok 0)
+	testr.eq(ctx, flate.bits(st, 1), `std.Ok 0)
+	testr.eq(ctx, flate.bits(st, 1), `std.Ok 1)
+	testr.eq(ctx, flate.bits(st, 2), `std.Ok 2)
+	testr.eq(ctx, flate.bits(st, 3), `std.Ok 2)
+	testr.eq(ctx, flate.bits(st, 2), eof)
+
+	/* reading across byte boundaries */
+	st = init("\xaa\xaa")
+	testr.eq(ctx, flate.bits(st, 2), `std.Ok 2)
+	testr.eq(ctx, flate.bits(st, 8+2), `std.Ok 0x2aa)
+	testr.eq(ctx, flate.bits(st, 1), `std.Ok 0)
+	testr.eq(ctx, flate.bits(st, 4), eof)
+
+	/* reading whole bytes */
+	st = init("\xab\xcd")
+	testr.eq(ctx, flate.bits(st, 8), `std.Ok 0xab)
+	testr.eq(ctx, flate.bits(st, 8), `std.Ok 0xcd)
+
+	/* refilling */
+	st = init("")
+	var n = 0
+	var pn = &n
+	st.rdf = bio.mk(bio.Rd, [
+		.read = {buf
+			buf[0] = (++pn# : byte)
+			-> `std.Ok 1
+		}
+	])
+	testr.eq(ctx, flate.bits(st, 3), `std.Ok 1)
+	testr.eq(ctx, flate.bits(st, 8), `std.Ok (2 << 5))
+	testr.eq(ctx, flate.bits(st, 10), `std.Ok (3 << 5))
+}
+
+const mkhufc = {ctx
+	var count
+	var ht
+
+	count = [
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+	][:]
+
+	flate.mkhufc([][:], &ht)
+	testr.check(ctx, std.eq(ht.count[:], count), "empty code")
+
+	flate.mkhufc([1, 1][:], &ht)
+	count[1] = 2
+	testr.check(ctx, std.eq(ht.count[:], count), "1-bit code")
+	testr.eq(ctx, ht.symbol[:2], [0, 1][:])
+
+	flate.mkhufc([2, 1, 3, 3][:], &ht)
+	std.slcp(count[1:4], [1, 1, 2][:])
+	testr.check(ctx, std.eq(ht.count[:], count), "rfc code")
+	testr.eq(ctx, ht.symbol[:4], [1, 0, 2, 3][:])
+}
+
+const readcode = {ctx
+	var st, ht
+
+	flate.mkhufc([2, 1, 3, 3][:], &ht)
+	st = init("\xcd\x05")
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 0)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 2)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 1)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 3)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 1)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 0)
+	testr.eq(ctx, st.bpos, 4)
+
+	/* same as above with some empty codes */
+	flate.mkhufc([2, 1, 0, 0, 3, 0, 3][:], &ht)
+	st = init("\xcd\x05")
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 0)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 4)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 1)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 6)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 1)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 0)
+	testr.eq(ctx, st.bpos, 4)
+
+	/* empty codes at the beginning */
+	flate.mkhufc([0, 0, 0, 4][:], &ht)
+	st = init("\x00")
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 3)
+	testr.eq(ctx, flate.readcode(st, &ht), `std.Ok 3)
+	testr.eq(ctx, flate.readcode(st, &ht), eof)
+}
+
+const bitsf = {str : byte[:]
+	var sl, n, b
+
+	sl = [][:]
+	n = 0
+	b = 0
+	for c : str
+		match (c : char)
+		| '0': n++
+		| '1': b += (1 << n); n++
+		| _: /* do nothing */
+		;;
+		if n == 8
+			std.slpush(&sl, (b : byte))
+			n = 0
+			b = 0
+		;;
+	;;
+	std.slpush(&sl, (b : byte))
+	-> bio.mkmem(sl)
+}
+
+const expect = {ctx, t, str
+	var exp
+
+	exp = std.mk(str)
+	const write = {buf
+		if buf.len > exp#.len
+			testr.fail(ctx,
+				"({}) more bytes written than expected:" \
+				" '{}' written when expecting '{}'",
+				t, buf, exp#)
+		;;
+		if !std.eq(buf, exp#[:buf.len])
+			testr.fail(ctx,
+				"({}) '{}' written when expecting '{}'",
+				t, buf, exp#[:buf.len])
+		;;
+		exp# = exp#[buf.len:]
+		-> `std.Ok buf.len
+	}
+	const close = {
+		if exp#.len > 0
+			testr.fail(ctx,
+				"({}) output closed when still expecting '{}'",
+				t, exp#)
+		;;
+		-> true
+	}
+	-> bio.mk(bio.Wr, [.write = write, .close = close])
+}
+
+type errortype = union
+	`None
+	`Eof
+	`Io
+	`Format
+;;
+
+const decode = {ctx
+	const tests = [
+		("empty",
+		 "",
+		 "", `Eof),
+		("reserved block",
+		 "1 11 00",
+		 "", `Format),
+		("partial block",
+		 "1 1",
+		 "", `Eof), /* could be `Format error... */
+		("uncompressed empty block",
+		 "1 00 00000 0000000000000000 1111111111111111",
+		 "", `None),
+		("uncompressed 3-bytes block",
+		 "1 00 00000 1100000000000000 0011111111111111" \
+		 "10000110 01000110 11000110",
+		 "abc", `None),
+		("uncompressed 1-byte then 2-bytes block",
+		 "0 00 00000 1000000000000000 0111111111111111" \
+		 "10000110" \
+		 "1 00 00000 0100000000000000 1011111111111111" \
+		 "01000110 11000110",
+		 "abc", `None),
+		("uncompressed mismatched len nlen",
+		 "1 00 00000 1000000000000000 0111111111111011" \
+		 "10000110",
+		 "", `Format),
+		("fixed code single block",
+		 "1 10 10011000 10010101 10011100 10011100" \
+		 "10011111 01010001 0000000",
+		 "hello!", `None),
+		("dynamic code block",
+		 // 'r' markers are used when bits must come in "reverse"
+		 "1 01" \
+		 "r00000 r00000 r1111" \           // 257 literal, 1 distance, 19 code codes
+		 "r101 101 101 101 101 101 101 101 101 101 101 101" \
+		 " 101 101 101 101 101 101 101" \  // All 14 code codes of length 5
+		 "10010 r0110101" \                // 97 zeroes
+		 "00011 00011 00011" \             // then three times 3
+		 "10010 r1111111 10010 r1110000" \ // then 156 (= 11+127 + 11+7) zeroes
+		 "00011" \                         // then 3 once (end of block)
+		 "00000" \                         // one unused length code
+		 "000 001 010 011",                // finally, some data
+		 "abc", `None),
+		("dynamic code block with reference",
+		 "1 01" \
+		 "r01000 r10000 r1111" \           // 258 literal, 2 distance, 19 code codes
+		 "r101 101 101 101 101 101 101 101 101 101 101 101" \
+		 " 101 101 101 101 101 101 101" \  // All 14 code codes of length 5
+		 "10010 r0110101" \                // 97 zeroes
+		 "00011 00011 00011" \             // then three times 3
+		 "10010 r1111111 10010 r1110000" \ // then 156 (= 11+127 + 11+7) zeroes
+		 "00011 00000 00011" \             // then 3, 0, 3 (ref of length 4)
+		 "00010 00010" \                   // two size two distance code
+		 "000 001 010 100 01 011",         // use the reference at the end
+		 "abcbcbc", `None),
+		("dynamic code with invalid reference",
+		 "1 01" \
+		 "r01000 r10000 r1111" \
+		 "r101 101 101 101 101 101 101 101 101 101 101 101" \
+		 " 101 101 101 101 101 101 101" \
+		 "10010 r0110101" \
+		 "00011 00011 00011" \
+		 "10010 r1111111 10010 r1110000" \
+		 "00011 00000 00011" \
+		 "00010 00010" \
+		 "000 100 01",
+		 "a", `Format),
+		("dynamic code with cross-blocks reference",
+		 "0 00 00000 1100000000000000 0011111111111111" \
+		 "10000110 01000110 11000110" \    // "abc" uncompressed
+		 "1 01" \
+		 "r01000 r00000 r1000" \           // 258 literal, 1 distance, 5 code codes
+		 "r110 110 110 000 010" \          // code lengths of 3, 3, 3, 0, 2
+		 \                                 // for 16, 17, 18, 0, 8
+		 "100 r1111111" \                  // 138 zeroes
+		 "100 r1101011" \                  // +118 zeroes = 256 zeroes
+		 "00" \                            // 256 (=EOB) has length 8
+		 "010 r00" \                       // repeat this length 3 times
+		 "00000001 00000000 00000000",     // one reference, then EOB
+		 "abcccc", `None),
+	]
+	var outf, err
+
+	for (t, in, out, experr) : tests
+		outf = expect(ctx, t, out)
+		err = flate.decode(outf, bitsf(in))
+		match (err, experr)
+		| (`std.Ok _, `None):
+		| (`std.Err (`flate.Io `bio.Eof), `Eof):
+		| (`std.Err (`flate.Io _), `Io):
+		| (`std.Err (`flate.Format _), `Format):
+		| _:
+			testr.fail(ctx,
+				"({}) {} error expected but got {} instead",
+				t, experr, err)
+		;;
+		bio.close(outf)
+	;;
+}
+
+const main = {
+	testr.run([
+		[.name = "bits", .fn = bits],
+		[.name = "mkhufc", .fn = mkhufc],
+		[.name = "readcode", .fn = readcode],
+		[.name = "decode", .fn = decode],
+	][:])
+}
--- /dev/null
+++ b/lib/flate/types.myr
@@ -1,0 +1,8 @@
+use bio
+
+pkg flate =
+	type err = union
+		`Io bio.err
+		`Format byte[:]
+	;;
+;;