Rename Makefile targets

Signed-off-by: Alex Ellis (OpenFaaS Ltd) <alex@openfaas.com>
2025-06-23 07:13:23 +00:00 · 2023-10-23 11:29:19 +01:00
parent 479285caf6
commit 9ba4a73d5d
350 changed files with 22981 additions and 3972 deletions
--- a/gateway/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/deflate.go
--- a/gateway/vendor/github.com/klauspost/compress/flate/dict_decoder.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/dict_decoder.go
@ -0,0 +1,184 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
+// LZ77 decompresses data through sequences of two forms of commands:
+//
+//   - Literal insertions: Runs of one or more symbols are inserted into the data
+//     stream as is. This is accomplished through the writeByte method for a
+//     single symbol, or combinations of writeSlice/writeMark for multiple symbols.
+//     Any valid stream must start with a literal insertion if no preset dictionary
+//     is used.
+//
+//   - Backward copies: Runs of one or more symbols are copied from previously
+//     emitted data. Backward copies come as the tuple (dist, length) where dist
+//     determines how far back in the stream to copy from and length determines how
+//     many bytes to copy. Note that it is valid for the length to be greater than
+//     the distance. Since LZ77 uses forward copies, that situation is used to
+//     perform a form of run-length encoding on repeated runs of symbols.
+//     The writeCopy and tryWriteCopy are used to implement this command.
+//
+// For performance reasons, this implementation performs little to no sanity
+// checks about the arguments. As such, the invariants documented for each
+// method call must be respected.
+type dictDecoder struct {
+	hist []byte // Sliding window history
+
+	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
+	wrPos int  // Current output position in buffer
+	rdPos int  // Have emitted hist[:rdPos] already
+	full  bool // Has a full window length been written yet?
+}
+
+// init initializes dictDecoder to have a sliding window dictionary of the given
+// size. If a preset dict is provided, it will initialize the dictionary with
+// the contents of dict.
+func (dd *dictDecoder) init(size int, dict []byte) {
+	*dd = dictDecoder{hist: dd.hist}
+
+	if cap(dd.hist) < size {
+		dd.hist = make([]byte, size)
+	}
+	dd.hist = dd.hist[:size]
+
+	if len(dict) > len(dd.hist) {
+		dict = dict[len(dict)-len(dd.hist):]
+	}
+	dd.wrPos = copy(dd.hist, dict)
+	if dd.wrPos == len(dd.hist) {
+		dd.wrPos = 0
+		dd.full = true
+	}
+	dd.rdPos = dd.wrPos
+}
+
+// histSize reports the total amount of historical data in the dictionary.
+func (dd *dictDecoder) histSize() int {
+	if dd.full {
+		return len(dd.hist)
+	}
+	return dd.wrPos
+}
+
+// availRead reports the number of bytes that can be flushed by readFlush.
+func (dd *dictDecoder) availRead() int {
+	return dd.wrPos - dd.rdPos
+}
+
+// availWrite reports the available amount of output buffer space.
+func (dd *dictDecoder) availWrite() int {
+	return len(dd.hist) - dd.wrPos
+}
+
+// writeSlice returns a slice of the available buffer to write data to.
+//
+// This invariant will be kept: len(s) <= availWrite()
+func (dd *dictDecoder) writeSlice() []byte {
+	return dd.hist[dd.wrPos:]
+}
+
+// writeMark advances the writer pointer by cnt.
+//
+// This invariant must be kept: 0 <= cnt <= availWrite()
+func (dd *dictDecoder) writeMark(cnt int) {
+	dd.wrPos += cnt
+}
+
+// writeByte writes a single byte to the dictionary.
+//
+// This invariant must be kept: 0 < availWrite()
+func (dd *dictDecoder) writeByte(c byte) {
+	dd.hist[dd.wrPos] = c
+	dd.wrPos++
+}
+
+// writeCopy copies a string at a given (dist, length) to the output.
+// This returns the number of bytes copied and may be less than the requested
+// length if the available space in the output buffer is too small.
+//
+// This invariant must be kept: 0 < dist <= histSize()
+func (dd *dictDecoder) writeCopy(dist, length int) int {
+	dstBase := dd.wrPos
+	dstPos := dstBase
+	srcPos := dstPos - dist
+	endPos := dstPos + length
+	if endPos > len(dd.hist) {
+		endPos = len(dd.hist)
+	}
+
+	// Copy non-overlapping section after destination position.
+	//
+	// This section is non-overlapping in that the copy length for this section
+	// is always less than or equal to the backwards distance. This can occur
+	// if a distance refers to data that wraps-around in the buffer.
+	// Thus, a backwards copy is performed here; that is, the exact bytes in
+	// the source prior to the copy is placed in the destination.
+	if srcPos < 0 {
+		srcPos += len(dd.hist)
+		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
+		srcPos = 0
+	}
+
+	// Copy possibly overlapping section before destination position.
+	//
+	// This section can overlap if the copy length for this section is larger
+	// than the backwards distance. This is allowed by LZ77 so that repeated
+	// strings can be succinctly represented using (dist, length) pairs.
+	// Thus, a forwards copy is performed here; that is, the bytes copied is
+	// possibly dependent on the resulting bytes in the destination as the copy
+	// progresses along. This is functionally equivalent to the following:
+	//
+	//	for i := 0; i < endPos-dstPos; i++ {
+	//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
+	//	}
+	//	dstPos = endPos
+	//
+	for dstPos < endPos {
+		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
+	}
+
+	dd.wrPos = dstPos
+	return dstPos - dstBase
+}
+
+// tryWriteCopy tries to copy a string at a given (distance, length) to the
+// output. This specialized version is optimized for short distances.
+//
+// This method is designed to be inlined for performance reasons.
+//
+// This invariant must be kept: 0 < dist <= histSize()
+func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
+	dstPos := dd.wrPos
+	endPos := dstPos + length
+	if dstPos < dist || endPos > len(dd.hist) {
+		return 0
+	}
+	dstBase := dstPos
+	srcPos := dstPos - dist
+
+	// Copy possibly overlapping section before destination position.
+loop:
+	dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
+	if dstPos < endPos {
+		goto loop // Avoid for-loop so that this function can be inlined
+	}
+
+	dd.wrPos = dstPos
+	return dstPos - dstBase
+}
+
+// readFlush returns a slice of the historical buffer that is ready to be
+// emitted to the user. The data returned by readFlush must be fully consumed
+// before calling any other dictDecoder methods.
+func (dd *dictDecoder) readFlush() []byte {
+	toRead := dd.hist[dd.rdPos:dd.wrPos]
+	dd.rdPos = dd.wrPos
+	if dd.wrPos == len(dd.hist) {
+		dd.wrPos, dd.rdPos = 0, 0
+		dd.full = true
+	}
+	return toRead
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/fast_encoder.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@ -0,0 +1,193 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Modified for deflate by Klaus Post (c) 2015.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"encoding/binary"
+	"fmt"
+)
+
+type fastEnc interface {
+	Encode(dst *tokens, src []byte)
+	Reset()
+}
+
+func newFastEnc(level int) fastEnc {
+	switch level {
+	case 1:
+		return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}}
+	case 2:
+		return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}}
+	case 3:
+		return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}}
+	case 4:
+		return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}}
+	case 5:
+		return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}}
+	case 6:
+		return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}}
+	default:
+		panic("invalid level specified")
+	}
+}
+
+const (
+	tableBits       = 15             // Bits used in the table
+	tableSize       = 1 << tableBits // Size of the table
+	tableShift      = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
+	baseMatchOffset = 1              // The smallest match offset
+	baseMatchLength = 3              // The smallest match length per the RFC section 3.2.5
+	maxMatchOffset  = 1 << 15        // The largest match offset
+
+	bTableBits   = 17                                               // Bits used in the big tables
+	bTableSize   = 1 << bTableBits                                  // Size of the table
+	allocHistory = maxStoreBlockSize * 5                            // Size to preallocate for history.
+	bufferReset  = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
+)
+
+const (
+	prime3bytes = 506832829
+	prime4bytes = 2654435761
+	prime5bytes = 889523592379
+	prime6bytes = 227718039650203
+	prime7bytes = 58295818150454627
+	prime8bytes = 0xcf1bbcdcb7a56463
+)
+
+func load3232(b []byte, i int32) uint32 {
+	return binary.LittleEndian.Uint32(b[i:])
+}
+
+func load6432(b []byte, i int32) uint64 {
+	return binary.LittleEndian.Uint64(b[i:])
+}
+
+type tableEntry struct {
+	offset int32
+}
+
+// fastGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type fastGen struct {
+	hist []byte
+	cur  int32
+}
+
+func (e *fastGen) addBlock(src []byte) int32 {
+	// check if we have space already
+	if len(e.hist)+len(src) > cap(e.hist) {
+		if cap(e.hist) == 0 {
+			e.hist = make([]byte, 0, allocHistory)
+		} else {
+			if cap(e.hist) < maxMatchOffset*2 {
+				panic("unexpected buffer size")
+			}
+			// Move down
+			offset := int32(len(e.hist)) - maxMatchOffset
+			// copy(e.hist[0:maxMatchOffset], e.hist[offset:])
+			*(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:])
+			e.cur += offset
+			e.hist = e.hist[:maxMatchOffset]
+		}
+	}
+	s := int32(len(e.hist))
+	e.hist = append(e.hist, src...)
+	return s
+}
+
+type tableEntryPrev struct {
+	Cur  tableEntry
+	Prev tableEntry
+}
+
+// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <64.
+func hash7(u uint64, h uint8) uint32 {
+	return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
+}
+
+// hashLen returns a hash of the lowest mls bytes of with length output bits.
+// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
+// length should always be < 32.
+// Preferably length and mls should be a constant for inlining.
+func hashLen(u uint64, length, mls uint8) uint32 {
+	switch mls {
+	case 3:
+		return (uint32(u<<8) * prime3bytes) >> (32 - length)
+	case 5:
+		return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
+	case 6:
+		return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
+	case 7:
+		return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
+	case 8:
+		return uint32((u * prime8bytes) >> (64 - length))
+	default:
+		return (uint32(u) * prime4bytes) >> (32 - length)
+	}
+}
+
+// matchlen will return the match length between offsets and t in src.
+// The maximum length returned is maxMatchLength - 4.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
+	if debugDecode {
+		if t >= s {
+			panic(fmt.Sprint("t >=s:", t, s))
+		}
+		if int(s) >= len(src) {
+			panic(fmt.Sprint("s >= len(src):", s, len(src)))
+		}
+		if t < 0 {
+			panic(fmt.Sprint("t < 0:", t))
+		}
+		if s-t > maxMatchOffset {
+			panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+		}
+	}
+	s1 := int(s) + maxMatchLength - 4
+	if s1 > len(src) {
+		s1 = len(src)
+	}
+
+	// Extend the match to be as long as possible.
+	return int32(matchLen(src[s:s1], src[t:]))
+}
+
+// matchlenLong will return the match length between offsets and t in src.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
+	if debugDeflate {
+		if t >= s {
+			panic(fmt.Sprint("t >=s:", t, s))
+		}
+		if int(s) >= len(src) {
+			panic(fmt.Sprint("s >= len(src):", s, len(src)))
+		}
+		if t < 0 {
+			panic(fmt.Sprint("t < 0:", t))
+		}
+		if s-t > maxMatchOffset {
+			panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+		}
+	}
+	// Extend the match to be as long as possible.
+	return int32(matchLen(src[s:], src[t:]))
+}
+
+// Reset the encoding table.
+func (e *fastGen) Reset() {
+	if cap(e.hist) < allocHistory {
+		e.hist = make([]byte, 0, allocHistory)
+	}
+	// We offset current position so everything will be out of reach.
+	// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
+	if e.cur <= bufferReset {
+		e.cur += maxMatchOffset + int32(len(e.hist))
+	}
+	e.hist = e.hist[:0]
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
--- a/gateway/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/huffman_code.go
@ -0,0 +1,417 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"math"
+	"math/bits"
+)
+
+const (
+	maxBitsLimit = 16
+	// number of valid literals
+	literalCount = 286
+)
+
+// hcode is a huffman code with a bit code and bit length.
+type hcode uint32
+
+func (h hcode) len() uint8 {
+	return uint8(h)
+}
+
+func (h hcode) code64() uint64 {
+	return uint64(h >> 8)
+}
+
+func (h hcode) zero() bool {
+	return h == 0
+}
+
+type huffmanEncoder struct {
+	codes    []hcode
+	bitCount [17]int32
+
+	// Allocate a reusable buffer with the longest possible frequency table.
+	// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
+	// The largest of these is literalCount, so we allocate for that case.
+	freqcache [literalCount + 1]literalNode
+}
+
+type literalNode struct {
+	literal uint16
+	freq    uint16
+}
+
+// A levelInfo describes the state of the constructed tree for a given depth.
+type levelInfo struct {
+	// Our level.  for better printing
+	level int32
+
+	// The frequency of the last node at this level
+	lastFreq int32
+
+	// The frequency of the next character to add to this level
+	nextCharFreq int32
+
+	// The frequency of the next pair (from level below) to add to this level.
+	// Only valid if the "needed" value of the next lower level is 0.
+	nextPairFreq int32
+
+	// The number of chains remaining to generate for this level before moving
+	// up to the next level
+	needed int32
+}
+
+// set sets the code and length of an hcode.
+func (h *hcode) set(code uint16, length uint8) {
+	*h = hcode(length) | (hcode(code) << 8)
+}
+
+func newhcode(code uint16, length uint8) hcode {
+	return hcode(length) | (hcode(code) << 8)
+}
+
+func reverseBits(number uint16, bitLength byte) uint16 {
+	return bits.Reverse16(number << ((16 - bitLength) & 15))
+}
+
+func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
+
+func newHuffmanEncoder(size int) *huffmanEncoder {
+	// Make capacity to next power of two.
+	c := uint(bits.Len32(uint32(size - 1)))
+	return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
+}
+
+// Generates a HuffmanCode corresponding to the fixed literal table
+func generateFixedLiteralEncoding() *huffmanEncoder {
+	h := newHuffmanEncoder(literalCount)
+	codes := h.codes
+	var ch uint16
+	for ch = 0; ch < literalCount; ch++ {
+		var bits uint16
+		var size uint8
+		switch {
+		case ch < 144:
+			// size 8, 000110000  .. 10111111
+			bits = ch + 48
+			size = 8
+		case ch < 256:
+			// size 9, 110010000 .. 111111111
+			bits = ch + 400 - 144
+			size = 9
+		case ch < 280:
+			// size 7, 0000000 .. 0010111
+			bits = ch - 256
+			size = 7
+		default:
+			// size 8, 11000000 .. 11000111
+			bits = ch + 192 - 280
+			size = 8
+		}
+		codes[ch] = newhcode(reverseBits(bits, size), size)
+	}
+	return h
+}
+
+func generateFixedOffsetEncoding() *huffmanEncoder {
+	h := newHuffmanEncoder(30)
+	codes := h.codes
+	for ch := range codes {
+		codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5)
+	}
+	return h
+}
+
+var fixedLiteralEncoding = generateFixedLiteralEncoding()
+var fixedOffsetEncoding = generateFixedOffsetEncoding()
+
+func (h *huffmanEncoder) bitLength(freq []uint16) int {
+	var total int
+	for i, f := range freq {
+		if f != 0 {
+			total += int(f) * int(h.codes[i].len())
+		}
+	}
+	return total
+}
+
+func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
+	var total int
+	for _, f := range b {
+		total += int(h.codes[f].len())
+	}
+	return total
+}
+
+// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
+func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
+	var total int
+	for i, f := range freq {
+		if f != 0 {
+			code := h.codes[i]
+			if code.zero() {
+				return math.MaxInt32
+			}
+			total += int(f) * int(code.len())
+		}
+	}
+	return total
+}
+
+// Return the number of literals assigned to each bit size in the Huffman encoding
+//
+// This method is only called when list.length >= 3
+// The cases of 0, 1, and 2 literals are handled by special case code.
+//
+// list  An array of the literals with non-zero frequencies
+//
+//	and their associated frequencies. The array is in order of increasing
+//	frequency, and has as its last element a special element with frequency
+//	MaxInt32
+//
+// maxBits     The maximum number of bits that should be used to encode any literal.
+//
+//	Must be less than 16.
+//
+// return      An integer array in which array[i] indicates the number of literals
+//
+//	that should be encoded in i bits.
+func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
+	if maxBits >= maxBitsLimit {
+		panic("flate: maxBits too large")
+	}
+	n := int32(len(list))
+	list = list[0 : n+1]
+	list[n] = maxNode()
+
+	// The tree can't have greater depth than n - 1, no matter what. This
+	// saves a little bit of work in some small cases
+	if maxBits > n-1 {
+		maxBits = n - 1
+	}
+
+	// Create information about each of the levels.
+	// A bogus "Level 0" whose sole purpose is so that
+	// level1.prev.needed==0.  This makes level1.nextPairFreq
+	// be a legitimate value that never gets chosen.
+	var levels [maxBitsLimit]levelInfo
+	// leafCounts[i] counts the number of literals at the left
+	// of ancestors of the rightmost node at level i.
+	// leafCounts[i][j] is the number of literals at the left
+	// of the level j ancestor.
+	var leafCounts [maxBitsLimit][maxBitsLimit]int32
+
+	// Descending to only have 1 bounds check.
+	l2f := int32(list[2].freq)
+	l1f := int32(list[1].freq)
+	l0f := int32(list[0].freq) + int32(list[1].freq)
+
+	for level := int32(1); level <= maxBits; level++ {
+		// For every level, the first two items are the first two characters.
+		// We initialize the levels as if we had already figured this out.
+		levels[level] = levelInfo{
+			level:        level,
+			lastFreq:     l1f,
+			nextCharFreq: l2f,
+			nextPairFreq: l0f,
+		}
+		leafCounts[level][level] = 2
+		if level == 1 {
+			levels[level].nextPairFreq = math.MaxInt32
+		}
+	}
+
+	// We need a total of 2*n - 2 items at top level and have already generated 2.
+	levels[maxBits].needed = 2*n - 4
+
+	level := uint32(maxBits)
+	for level < 16 {
+		l := &levels[level]
+		if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
+			// We've run out of both leafs and pairs.
+			// End all calculations for this level.
+			// To make sure we never come back to this level or any lower level,
+			// set nextPairFreq impossibly large.
+			l.needed = 0
+			levels[level+1].nextPairFreq = math.MaxInt32
+			level++
+			continue
+		}
+
+		prevFreq := l.lastFreq
+		if l.nextCharFreq < l.nextPairFreq {
+			// The next item on this row is a leaf node.
+			n := leafCounts[level][level] + 1
+			l.lastFreq = l.nextCharFreq
+			// Lower leafCounts are the same of the previous node.
+			leafCounts[level][level] = n
+			e := list[n]
+			if e.literal < math.MaxUint16 {
+				l.nextCharFreq = int32(e.freq)
+			} else {
+				l.nextCharFreq = math.MaxInt32
+			}
+		} else {
+			// The next item on this row is a pair from the previous row.
+			// nextPairFreq isn't valid until we generate two
+			// more values in the level below
+			l.lastFreq = l.nextPairFreq
+			// Take leaf counts from the lower level, except counts[level] remains the same.
+			if true {
+				save := leafCounts[level][level]
+				leafCounts[level] = leafCounts[level-1]
+				leafCounts[level][level] = save
+			} else {
+				copy(leafCounts[level][:level], leafCounts[level-1][:level])
+			}
+			levels[l.level-1].needed = 2
+		}
+
+		if l.needed--; l.needed == 0 {
+			// We've done everything we need to do for this level.
+			// Continue calculating one level up. Fill in nextPairFreq
+			// of that level with the sum of the two nodes we've just calculated on
+			// this level.
+			if l.level == maxBits {
+				// All done!
+				break
+			}
+			levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
+			level++
+		} else {
+			// If we stole from below, move down temporarily to replenish it.
+			for levels[level-1].needed > 0 {
+				level--
+			}
+		}
+	}
+
+	// Somethings is wrong if at the end, the top level is null or hasn't used
+	// all of the leaves.
+	if leafCounts[maxBits][maxBits] != n {
+		panic("leafCounts[maxBits][maxBits] != n")
+	}
+
+	bitCount := h.bitCount[:maxBits+1]
+	bits := 1
+	counts := &leafCounts[maxBits]
+	for level := maxBits; level > 0; level-- {
+		// chain.leafCount gives the number of literals requiring at least "bits"
+		// bits to encode.
+		bitCount[bits] = counts[level] - counts[level-1]
+		bits++
+	}
+	return bitCount
+}
+
+// Look at the leaves and assign them a bit count and an encoding as specified
+// in RFC 1951 3.2.2
+func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
+	code := uint16(0)
+	for n, bits := range bitCount {
+		code <<= 1
+		if n == 0 || bits == 0 {
+			continue
+		}
+		// The literals list[len(list)-bits] .. list[len(list)-bits]
+		// are encoded using "bits" bits, and get the values
+		// code, code + 1, ....  The code values are
+		// assigned in literal order (not frequency order).
+		chunk := list[len(list)-int(bits):]
+
+		sortByLiteral(chunk)
+		for _, node := range chunk {
+			h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n))
+			code++
+		}
+		list = list[0 : len(list)-int(bits)]
+	}
+}
+
+// Update this Huffman Code object to be the minimum code for the specified frequency count.
+//
+// freq  An array of frequencies, in which frequency[i] gives the frequency of literal i.
+// maxBits  The maximum number of bits to use for any literal.
+func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
+	list := h.freqcache[:len(freq)+1]
+	codes := h.codes[:len(freq)]
+	// Number of non-zero literals
+	count := 0
+	// Set list to be the set of all non-zero literals and their frequencies
+	for i, f := range freq {
+		if f != 0 {
+			list[count] = literalNode{uint16(i), f}
+			count++
+		} else {
+			codes[i] = 0
+		}
+	}
+	list[count] = literalNode{}
+
+	list = list[:count]
+	if count <= 2 {
+		// Handle the small cases here, because they are awkward for the general case code. With
+		// two or fewer literals, everything has bit length 1.
+		for i, node := range list {
+			// "list" is in order of increasing literal value.
+			h.codes[node.literal].set(uint16(i), 1)
+		}
+		return
+	}
+	sortByFreq(list)
+
+	// Get the number of literals for each bit count
+	bitCount := h.bitCounts(list, maxBits)
+	// And do the assignment
+	h.assignEncodingAndSize(bitCount, list)
+}
+
+// atLeastOne clamps the result between 1 and 15.
+func atLeastOne(v float32) float32 {
+	if v < 1 {
+		return 1
+	}
+	if v > 15 {
+		return 15
+	}
+	return v
+}
+
+func histogram(b []byte, h []uint16) {
+	if true && len(b) >= 8<<10 {
+		// Split for bigger inputs
+		histogramSplit(b, h)
+	} else {
+		h = h[:256]
+		for _, t := range b {
+			h[t]++
+		}
+	}
+}
+
+func histogramSplit(b []byte, h []uint16) {
+	// Tested, and slightly faster than 2-way.
+	// Writing to separate arrays and combining is also slightly slower.
+	h = h[:256]
+	for len(b)&3 != 0 {
+		h[b[0]]++
+		b = b[1:]
+	}
+	n := len(b) / 4
+	x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:]
+	y, z, w = y[:len(x)], z[:len(x)], w[:len(x)]
+	for i, t := range x {
+		v0 := &h[t]
+		v1 := &h[y[i]]
+		v3 := &h[w[i]]
+		v2 := &h[z[i]]
+		*v0++
+		*v1++
+		*v2++
+		*v3++
+	}
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go
@ -0,0 +1,159 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// Sort sorts data.
+// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
+// data.Less and data.Swap. The sort is not guaranteed to be stable.
+func sortByFreq(data []literalNode) {
+	n := len(data)
+	quickSortByFreq(data, 0, n, maxDepth(n))
+}
+
+func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
+	for b-a > 12 { // Use ShellSort for slices <= 12 elements
+		if maxDepth == 0 {
+			heapSort(data, a, b)
+			return
+		}
+		maxDepth--
+		mlo, mhi := doPivotByFreq(data, a, b)
+		// Avoiding recursion on the larger subproblem guarantees
+		// a stack depth of at most lg(b-a).
+		if mlo-a < b-mhi {
+			quickSortByFreq(data, a, mlo, maxDepth)
+			a = mhi // i.e., quickSortByFreq(data, mhi, b)
+		} else {
+			quickSortByFreq(data, mhi, b, maxDepth)
+			b = mlo // i.e., quickSortByFreq(data, a, mlo)
+		}
+	}
+	if b-a > 1 {
+		// Do ShellSort pass with gap 6
+		// It could be written in this simplified form cause b-a <= 12
+		for i := a + 6; i < b; i++ {
+			if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq {
+				data[i], data[i-6] = data[i-6], data[i]
+			}
+		}
+		insertionSortByFreq(data, a, b)
+	}
+}
+
+func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
+	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
+	if hi-lo > 40 {
+		// Tukey's ``Ninther,'' median of three medians of three.
+		s := (hi - lo) / 8
+		medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s)
+		medianOfThreeSortByFreq(data, m, m-s, m+s)
+		medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s)
+	}
+	medianOfThreeSortByFreq(data, lo, m, hi-1)
+
+	// Invariants are:
+	//	data[lo] = pivot (set up by ChoosePivot)
+	//	data[lo < i < a] < pivot
+	//	data[a <= i < b] <= pivot
+	//	data[b <= i < c] unexamined
+	//	data[c <= i < hi-1] > pivot
+	//	data[hi-1] >= pivot
+	pivot := lo
+	a, c := lo+1, hi-1
+
+	for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ {
+	}
+	b := a
+	for {
+		for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot
+		}
+		for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot
+		}
+		if b >= c {
+			break
+		}
+		// data[b] > pivot; data[c-1] <= pivot
+		data[b], data[c-1] = data[c-1], data[b]
+		b++
+		c--
+	}
+	// If hi-c<3 then there are duplicates (by property of median of nine).
+	// Let's be a bit more conservative, and set border to 5.
+	protect := hi-c < 5
+	if !protect && hi-c < (hi-lo)/4 {
+		// Lets test some points for equality to pivot
+		dups := 0
+		if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot
+			data[c], data[hi-1] = data[hi-1], data[c]
+			c++
+			dups++
+		}
+		if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot
+			b--
+			dups++
+		}
+		// m-lo = (hi-lo)/2 > 6
+		// b-lo > (hi-lo)*3/4-1 > 8
+		// ==> m < b ==> data[m] <= pivot
+		if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot
+			data[m], data[b-1] = data[b-1], data[m]
+			b--
+			dups++
+		}
+		// if at least 2 points are equal to pivot, assume skewed distribution
+		protect = dups > 1
+	}
+	if protect {
+		// Protect against a lot of duplicates
+		// Add invariant:
+		//	data[a <= i < b] unexamined
+		//	data[b <= i < c] = pivot
+		for {
+			for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot
+			}
+			for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot
+			}
+			if a >= b {
+				break
+			}
+			// data[a] == pivot; data[b-1] < pivot
+			data[a], data[b-1] = data[b-1], data[a]
+			a++
+			b--
+		}
+	}
+	// Swap pivot into middle
+	data[pivot], data[b-1] = data[b-1], data[pivot]
+	return b - 1, c
+}
+
+// Insertion sort
+func insertionSortByFreq(data []literalNode, a, b int) {
+	for i := a + 1; i < b; i++ {
+		for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- {
+			data[j], data[j-1] = data[j-1], data[j]
+		}
+	}
+}
+
+// quickSortByFreq, loosely following Bentley and McIlroy,
+// ``Engineering a Sort Function,'' SP&E November 1993.
+
+// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
+func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) {
+	// sort 3 elements
+	if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
+		data[m1], data[m0] = data[m0], data[m1]
+	}
+	// data[m0] <= data[m1]
+	if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq {
+		data[m2], data[m1] = data[m1], data[m2]
+		// data[m0] <= data[m2] && data[m1] < data[m2]
+		if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
+			data[m1], data[m0] = data[m0], data[m1]
+		}
+	}
+	// now data[m0] <= data[m1] <= data[m2]
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go
@ -0,0 +1,201 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// Sort sorts data.
+// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
+// data.Less and data.Swap. The sort is not guaranteed to be stable.
+func sortByLiteral(data []literalNode) {
+	n := len(data)
+	quickSort(data, 0, n, maxDepth(n))
+}
+
+func quickSort(data []literalNode, a, b, maxDepth int) {
+	for b-a > 12 { // Use ShellSort for slices <= 12 elements
+		if maxDepth == 0 {
+			heapSort(data, a, b)
+			return
+		}
+		maxDepth--
+		mlo, mhi := doPivot(data, a, b)
+		// Avoiding recursion on the larger subproblem guarantees
+		// a stack depth of at most lg(b-a).
+		if mlo-a < b-mhi {
+			quickSort(data, a, mlo, maxDepth)
+			a = mhi // i.e., quickSort(data, mhi, b)
+		} else {
+			quickSort(data, mhi, b, maxDepth)
+			b = mlo // i.e., quickSort(data, a, mlo)
+		}
+	}
+	if b-a > 1 {
+		// Do ShellSort pass with gap 6
+		// It could be written in this simplified form cause b-a <= 12
+		for i := a + 6; i < b; i++ {
+			if data[i].literal < data[i-6].literal {
+				data[i], data[i-6] = data[i-6], data[i]
+			}
+		}
+		insertionSort(data, a, b)
+	}
+}
+func heapSort(data []literalNode, a, b int) {
+	first := a
+	lo := 0
+	hi := b - a
+
+	// Build heap with greatest element at top.
+	for i := (hi - 1) / 2; i >= 0; i-- {
+		siftDown(data, i, hi, first)
+	}
+
+	// Pop elements, largest first, into end of data.
+	for i := hi - 1; i >= 0; i-- {
+		data[first], data[first+i] = data[first+i], data[first]
+		siftDown(data, lo, i, first)
+	}
+}
+
+// siftDown implements the heap property on data[lo, hi).
+// first is an offset into the array where the root of the heap lies.
+func siftDown(data []literalNode, lo, hi, first int) {
+	root := lo
+	for {
+		child := 2*root + 1
+		if child >= hi {
+			break
+		}
+		if child+1 < hi && data[first+child].literal < data[first+child+1].literal {
+			child++
+		}
+		if data[first+root].literal > data[first+child].literal {
+			return
+		}
+		data[first+root], data[first+child] = data[first+child], data[first+root]
+		root = child
+	}
+}
+func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) {
+	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
+	if hi-lo > 40 {
+		// Tukey's ``Ninther,'' median of three medians of three.
+		s := (hi - lo) / 8
+		medianOfThree(data, lo, lo+s, lo+2*s)
+		medianOfThree(data, m, m-s, m+s)
+		medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
+	}
+	medianOfThree(data, lo, m, hi-1)
+
+	// Invariants are:
+	//	data[lo] = pivot (set up by ChoosePivot)
+	//	data[lo < i < a] < pivot
+	//	data[a <= i < b] <= pivot
+	//	data[b <= i < c] unexamined
+	//	data[c <= i < hi-1] > pivot
+	//	data[hi-1] >= pivot
+	pivot := lo
+	a, c := lo+1, hi-1
+
+	for ; a < c && data[a].literal < data[pivot].literal; a++ {
+	}
+	b := a
+	for {
+		for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot
+		}
+		for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot
+		}
+		if b >= c {
+			break
+		}
+		// data[b] > pivot; data[c-1] <= pivot
+		data[b], data[c-1] = data[c-1], data[b]
+		b++
+		c--
+	}
+	// If hi-c<3 then there are duplicates (by property of median of nine).
+	// Let's be a bit more conservative, and set border to 5.
+	protect := hi-c < 5
+	if !protect && hi-c < (hi-lo)/4 {
+		// Lets test some points for equality to pivot
+		dups := 0
+		if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot
+			data[c], data[hi-1] = data[hi-1], data[c]
+			c++
+			dups++
+		}
+		if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot
+			b--
+			dups++
+		}
+		// m-lo = (hi-lo)/2 > 6
+		// b-lo > (hi-lo)*3/4-1 > 8
+		// ==> m < b ==> data[m] <= pivot
+		if data[m].literal > data[pivot].literal { // data[m] = pivot
+			data[m], data[b-1] = data[b-1], data[m]
+			b--
+			dups++
+		}
+		// if at least 2 points are equal to pivot, assume skewed distribution
+		protect = dups > 1
+	}
+	if protect {
+		// Protect against a lot of duplicates
+		// Add invariant:
+		//	data[a <= i < b] unexamined
+		//	data[b <= i < c] = pivot
+		for {
+			for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot
+			}
+			for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot
+			}
+			if a >= b {
+				break
+			}
+			// data[a] == pivot; data[b-1] < pivot
+			data[a], data[b-1] = data[b-1], data[a]
+			a++
+			b--
+		}
+	}
+	// Swap pivot into middle
+	data[pivot], data[b-1] = data[b-1], data[pivot]
+	return b - 1, c
+}
+
+// Insertion sort
+func insertionSort(data []literalNode, a, b int) {
+	for i := a + 1; i < b; i++ {
+		for j := i; j > a && data[j].literal < data[j-1].literal; j-- {
+			data[j], data[j-1] = data[j-1], data[j]
+		}
+	}
+}
+
+// maxDepth returns a threshold at which quicksort should switch
+// to heapsort. It returns 2*ceil(lg(n+1)).
+func maxDepth(n int) int {
+	var depth int
+	for i := n; i > 0; i >>= 1 {
+		depth++
+	}
+	return depth * 2
+}
+
+// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
+func medianOfThree(data []literalNode, m1, m0, m2 int) {
+	// sort 3 elements
+	if data[m1].literal < data[m0].literal {
+		data[m1], data[m0] = data[m0], data[m1]
+	}
+	// data[m0] <= data[m1]
+	if data[m2].literal < data[m1].literal {
+		data[m2], data[m1] = data[m1], data[m2]
+		// data[m0] <= data[m2] && data[m1] < data[m2]
+		if data[m1].literal < data[m0].literal {
+			data[m1], data[m0] = data[m0], data[m1]
+		}
+	}
+	// now data[m0] <= data[m1] <= data[m2]
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/inflate.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/inflate.go
@ -0,0 +1,829 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package flate implements the DEFLATE compressed data format, described in
+// RFC 1951.  The gzip and zlib packages implement access to DEFLATE-based file
+// formats.
+package flate
+
+import (
+	"bufio"
+	"compress/flate"
+	"fmt"
+	"io"
+	"math/bits"
+	"sync"
+)
+
+const (
+	maxCodeLen     = 16 // max length of Huffman code
+	maxCodeLenMask = 15 // mask for max length of Huffman code
+	// The next three numbers come from the RFC section 3.2.7, with the
+	// additional proviso in section 3.2.5 which implies that distance codes
+	// 30 and 31 should never occur in compressed data.
+	maxNumLit  = 286
+	maxNumDist = 30
+	numCodes   = 19 // number of codes in Huffman meta-code
+
+	debugDecode = false
+)
+
+// Value of length - 3 and extra bits.
+type lengthExtra struct {
+	length, extra uint8
+}
+
+var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
+
+var bitMask32 = [32]uint32{
+	0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
+	0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
+	0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
+	0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
+} // up to 32 bits
+
+// Initialize the fixedHuffmanDecoder only once upon first use.
+var fixedOnce sync.Once
+var fixedHuffmanDecoder huffmanDecoder
+
+// A CorruptInputError reports the presence of corrupt input at a given offset.
+type CorruptInputError = flate.CorruptInputError
+
+// An InternalError reports an error in the flate code itself.
+type InternalError string
+
+func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
+
+// A ReadError reports an error encountered while reading input.
+//
+// Deprecated: No longer returned.
+type ReadError = flate.ReadError
+
+// A WriteError reports an error encountered while writing output.
+//
+// Deprecated: No longer returned.
+type WriteError = flate.WriteError
+
+// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
+// to switch to a new underlying Reader. This permits reusing a ReadCloser
+// instead of allocating a new one.
+type Resetter interface {
+	// Reset discards any buffered data and resets the Resetter as if it was
+	// newly initialized with the given reader.
+	Reset(r io.Reader, dict []byte) error
+}
+
+// The data structure for decoding Huffman tables is based on that of
+// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
+// For codes smaller than the table width, there are multiple entries
+// (each combination of trailing bits has the same value). For codes
+// larger than the table width, the table contains a link to an overflow
+// table. The width of each entry in the link table is the maximum code
+// size minus the chunk width.
+//
+// Note that you can do a lookup in the table even without all bits
+// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
+// have the property that shorter codes come before longer ones, the
+// bit length estimate in the result is a lower bound on the actual
+// number of bits.
+//
+// See the following:
+//	http://www.gzip.org/algorithm.txt
+
+// chunk & 15 is number of bits
+// chunk >> 4 is value, including table link
+
+const (
+	huffmanChunkBits  = 9
+	huffmanNumChunks  = 1 << huffmanChunkBits
+	huffmanCountMask  = 15
+	huffmanValueShift = 4
+)
+
+type huffmanDecoder struct {
+	maxRead  int                       // the maximum number of bits we can read and not overread
+	chunks   *[huffmanNumChunks]uint16 // chunks as described above
+	links    [][]uint16                // overflow links
+	linkMask uint32                    // mask the width of the link table
+}
+
+// Initialize Huffman decoding tables from array of code lengths.
+// Following this function, h is guaranteed to be initialized into a complete
+// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
+// degenerate case where the tree has only a single symbol with length 1. Empty
+// trees are permitted.
+func (h *huffmanDecoder) init(lengths []int) bool {
+	// Sanity enables additional runtime tests during Huffman
+	// table construction. It's intended to be used during
+	// development to supplement the currently ad-hoc unit tests.
+	const sanity = false
+
+	if h.chunks == nil {
+		h.chunks = new([huffmanNumChunks]uint16)
+	}
+
+	if h.maxRead != 0 {
+		*h = huffmanDecoder{chunks: h.chunks, links: h.links}
+	}
+
+	// Count number of codes of each length,
+	// compute maxRead and max length.
+	var count [maxCodeLen]int
+	var min, max int
+	for _, n := range lengths {
+		if n == 0 {
+			continue
+		}
+		if min == 0 || n < min {
+			min = n
+		}
+		if n > max {
+			max = n
+		}
+		count[n&maxCodeLenMask]++
+	}
+
+	// Empty tree. The decompressor.huffSym function will fail later if the tree
+	// is used. Technically, an empty tree is only valid for the HDIST tree and
+	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
+	// is guaranteed to fail since it will attempt to use the tree to decode the
+	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
+	// guaranteed to fail later since the compressed data section must be
+	// composed of at least one symbol (the end-of-block marker).
+	if max == 0 {
+		return true
+	}
+
+	code := 0
+	var nextcode [maxCodeLen]int
+	for i := min; i <= max; i++ {
+		code <<= 1
+		nextcode[i&maxCodeLenMask] = code
+		code += count[i&maxCodeLenMask]
+	}
+
+	// Check that the coding is complete (i.e., that we've
+	// assigned all 2-to-the-max possible bit sequences).
+	// Exception: To be compatible with zlib, we also need to
+	// accept degenerate single-code codings. See also
+	// TestDegenerateHuffmanCoding.
+	if code != 1<<uint(max) && !(code == 1 && max == 1) {
+		if debugDecode {
+			fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
+		}
+		return false
+	}
+
+	h.maxRead = min
+
+	chunks := h.chunks[:]
+	for i := range chunks {
+		chunks[i] = 0
+	}
+
+	if max > huffmanChunkBits {
+		numLinks := 1 << (uint(max) - huffmanChunkBits)
+		h.linkMask = uint32(numLinks - 1)
+
+		// create link tables
+		link := nextcode[huffmanChunkBits+1] >> 1
+		if cap(h.links) < huffmanNumChunks-link {
+			h.links = make([][]uint16, huffmanNumChunks-link)
+		} else {
+			h.links = h.links[:huffmanNumChunks-link]
+		}
+		for j := uint(link); j < huffmanNumChunks; j++ {
+			reverse := int(bits.Reverse16(uint16(j)))
+			reverse >>= uint(16 - huffmanChunkBits)
+			off := j - uint(link)
+			if sanity && h.chunks[reverse] != 0 {
+				panic("impossible: overwriting existing chunk")
+			}
+			h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
+			if cap(h.links[off]) < numLinks {
+				h.links[off] = make([]uint16, numLinks)
+			} else {
+				h.links[off] = h.links[off][:numLinks]
+			}
+		}
+	} else {
+		h.links = h.links[:0]
+	}
+
+	for i, n := range lengths {
+		if n == 0 {
+			continue
+		}
+		code := nextcode[n]
+		nextcode[n]++
+		chunk := uint16(i<<huffmanValueShift | n)
+		reverse := int(bits.Reverse16(uint16(code)))
+		reverse >>= uint(16 - n)
+		if n <= huffmanChunkBits {
+			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
+				// We should never need to overwrite
+				// an existing chunk. Also, 0 is
+				// never a valid chunk, because the
+				// lower 4 "count" bits should be
+				// between 1 and 15.
+				if sanity && h.chunks[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				h.chunks[off] = chunk
+			}
+		} else {
+			j := reverse & (huffmanNumChunks - 1)
+			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
+				// Longer codes should have been
+				// associated with a link table above.
+				panic("impossible: not an indirect chunk")
+			}
+			value := h.chunks[j] >> huffmanValueShift
+			linktab := h.links[value]
+			reverse >>= huffmanChunkBits
+			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
+				if sanity && linktab[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				linktab[off] = chunk
+			}
+		}
+	}
+
+	if sanity {
+		// Above we've sanity checked that we never overwrote
+		// an existing entry. Here we additionally check that
+		// we filled the tables completely.
+		for i, chunk := range h.chunks {
+			if chunk == 0 {
+				// As an exception, in the degenerate
+				// single-code case, we allow odd
+				// chunks to be missing.
+				if code == 1 && i%2 == 1 {
+					continue
+				}
+				panic("impossible: missing chunk")
+			}
+		}
+		for _, linktab := range h.links {
+			for _, chunk := range linktab {
+				if chunk == 0 {
+					panic("impossible: missing chunk")
+				}
+			}
+		}
+	}
+
+	return true
+}
+
+// Reader is the actual read interface needed by NewReader.
+// If the passed in io.Reader does not also have ReadByte,
+// the NewReader will introduce its own buffering.
+type Reader interface {
+	io.Reader
+	io.ByteReader
+}
+
+type step uint8
+
+const (
+	copyData step = iota + 1
+	nextBlock
+	huffmanBytesBuffer
+	huffmanBytesReader
+	huffmanBufioReader
+	huffmanStringsReader
+	huffmanGenericReader
+)
+
+// Decompress state.
+type decompressor struct {
+	// Input source.
+	r       Reader
+	roffset int64
+
+	// Huffman decoders for literal/length, distance.
+	h1, h2 huffmanDecoder
+
+	// Length arrays used to define Huffman codes.
+	bits     *[maxNumLit + maxNumDist]int
+	codebits *[numCodes]int
+
+	// Output history, buffer.
+	dict dictDecoder
+
+	// Next step in the decompression,
+	// and decompression state.
+	step      step
+	stepState int
+	err       error
+	toRead    []byte
+	hl, hd    *huffmanDecoder
+	copyLen   int
+	copyDist  int
+
+	// Temporary buffer (avoids repeated allocation).
+	buf [4]byte
+
+	// Input bits, in top of b.
+	b uint32
+
+	nb    uint
+	final bool
+}
+
+func (f *decompressor) nextBlock() {
+	for f.nb < 1+2 {
+		if f.err = f.moreBits(); f.err != nil {
+			return
+		}
+	}
+	f.final = f.b&1 == 1
+	f.b >>= 1
+	typ := f.b & 3
+	f.b >>= 2
+	f.nb -= 1 + 2
+	switch typ {
+	case 0:
+		f.dataBlock()
+		if debugDecode {
+			fmt.Println("stored block")
+		}
+	case 1:
+		// compressed, fixed Huffman tables
+		f.hl = &fixedHuffmanDecoder
+		f.hd = nil
+		f.huffmanBlockDecoder()
+		if debugDecode {
+			fmt.Println("predefinied huffman block")
+		}
+	case 2:
+		// compressed, dynamic Huffman tables
+		if f.err = f.readHuffman(); f.err != nil {
+			break
+		}
+		f.hl = &f.h1
+		f.hd = &f.h2
+		f.huffmanBlockDecoder()
+		if debugDecode {
+			fmt.Println("dynamic huffman block")
+		}
+	default:
+		// 3 is reserved.
+		if debugDecode {
+			fmt.Println("reserved data block encountered")
+		}
+		f.err = CorruptInputError(f.roffset)
+	}
+}
+
+func (f *decompressor) Read(b []byte) (int, error) {
+	for {
+		if len(f.toRead) > 0 {
+			n := copy(b, f.toRead)
+			f.toRead = f.toRead[n:]
+			if len(f.toRead) == 0 {
+				return n, f.err
+			}
+			return n, nil
+		}
+		if f.err != nil {
+			return 0, f.err
+		}
+
+		f.doStep()
+
+		if f.err != nil && len(f.toRead) == 0 {
+			f.toRead = f.dict.readFlush() // Flush what's left in case of error
+		}
+	}
+}
+
+// WriteTo implements the io.WriteTo interface for io.Copy and friends.
+func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
+	total := int64(0)
+	flushed := false
+	for {
+		if len(f.toRead) > 0 {
+			n, err := w.Write(f.toRead)
+			total += int64(n)
+			if err != nil {
+				f.err = err
+				return total, err
+			}
+			if n != len(f.toRead) {
+				return total, io.ErrShortWrite
+			}
+			f.toRead = f.toRead[:0]
+		}
+		if f.err != nil && flushed {
+			if f.err == io.EOF {
+				return total, nil
+			}
+			return total, f.err
+		}
+		if f.err == nil {
+			f.doStep()
+		}
+		if len(f.toRead) == 0 && f.err != nil && !flushed {
+			f.toRead = f.dict.readFlush() // Flush what's left in case of error
+			flushed = true
+		}
+	}
+}
+
+func (f *decompressor) Close() error {
+	if f.err == io.EOF {
+		return nil
+	}
+	return f.err
+}
+
+// RFC 1951 section 3.2.7.
+// Compression with dynamic Huffman codes
+
+var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
+
+func (f *decompressor) readHuffman() error {
+	// HLIT[5], HDIST[5], HCLEN[4].
+	for f.nb < 5+5+4 {
+		if err := f.moreBits(); err != nil {
+			return err
+		}
+	}
+	nlit := int(f.b&0x1F) + 257
+	if nlit > maxNumLit {
+		if debugDecode {
+			fmt.Println("nlit > maxNumLit", nlit)
+		}
+		return CorruptInputError(f.roffset)
+	}
+	f.b >>= 5
+	ndist := int(f.b&0x1F) + 1
+	if ndist > maxNumDist {
+		if debugDecode {
+			fmt.Println("ndist > maxNumDist", ndist)
+		}
+		return CorruptInputError(f.roffset)
+	}
+	f.b >>= 5
+	nclen := int(f.b&0xF) + 4
+	// numCodes is 19, so nclen is always valid.
+	f.b >>= 4
+	f.nb -= 5 + 5 + 4
+
+	// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
+	for i := 0; i < nclen; i++ {
+		for f.nb < 3 {
+			if err := f.moreBits(); err != nil {
+				return err
+			}
+		}
+		f.codebits[codeOrder[i]] = int(f.b & 0x7)
+		f.b >>= 3
+		f.nb -= 3
+	}
+	for i := nclen; i < len(codeOrder); i++ {
+		f.codebits[codeOrder[i]] = 0
+	}
+	if !f.h1.init(f.codebits[0:]) {
+		if debugDecode {
+			fmt.Println("init codebits failed")
+		}
+		return CorruptInputError(f.roffset)
+	}
+
+	// HLIT + 257 code lengths, HDIST + 1 code lengths,
+	// using the code length Huffman code.
+	for i, n := 0, nlit+ndist; i < n; {
+		x, err := f.huffSym(&f.h1)
+		if err != nil {
+			return err
+		}
+		if x < 16 {
+			// Actual length.
+			f.bits[i] = x
+			i++
+			continue
+		}
+		// Repeat previous length or zero.
+		var rep int
+		var nb uint
+		var b int
+		switch x {
+		default:
+			return InternalError("unexpected length code")
+		case 16:
+			rep = 3
+			nb = 2
+			if i == 0 {
+				if debugDecode {
+					fmt.Println("i==0")
+				}
+				return CorruptInputError(f.roffset)
+			}
+			b = f.bits[i-1]
+		case 17:
+			rep = 3
+			nb = 3
+			b = 0
+		case 18:
+			rep = 11
+			nb = 7
+			b = 0
+		}
+		for f.nb < nb {
+			if err := f.moreBits(); err != nil {
+				if debugDecode {
+					fmt.Println("morebits:", err)
+				}
+				return err
+			}
+		}
+		rep += int(f.b & uint32(1<<(nb&regSizeMaskUint32)-1))
+		f.b >>= nb & regSizeMaskUint32
+		f.nb -= nb
+		if i+rep > n {
+			if debugDecode {
+				fmt.Println("i+rep > n", i, rep, n)
+			}
+			return CorruptInputError(f.roffset)
+		}
+		for j := 0; j < rep; j++ {
+			f.bits[i] = b
+			i++
+		}
+	}
+
+	if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
+		if debugDecode {
+			fmt.Println("init2 failed")
+		}
+		return CorruptInputError(f.roffset)
+	}
+
+	// As an optimization, we can initialize the maxRead bits to read at a time
+	// for the HLIT tree to the length of the EOB marker since we know that
+	// every block must terminate with one. This preserves the property that
+	// we never read any extra bytes after the end of the DEFLATE stream.
+	if f.h1.maxRead < f.bits[endBlockMarker] {
+		f.h1.maxRead = f.bits[endBlockMarker]
+	}
+	if !f.final {
+		// If not the final block, the smallest block possible is
+		// a predefined table, BTYPE=01, with a single EOB marker.
+		// This will take up 3 + 7 bits.
+		f.h1.maxRead += 10
+	}
+
+	return nil
+}
+
+// Copy a single uncompressed data block from input to output.
+func (f *decompressor) dataBlock() {
+	// Uncompressed.
+	// Discard current half-byte.
+	left := (f.nb) & 7
+	f.nb -= left
+	f.b >>= left
+
+	offBytes := f.nb >> 3
+	// Unfilled values will be overwritten.
+	f.buf[0] = uint8(f.b)
+	f.buf[1] = uint8(f.b >> 8)
+	f.buf[2] = uint8(f.b >> 16)
+	f.buf[3] = uint8(f.b >> 24)
+
+	f.roffset += int64(offBytes)
+	f.nb, f.b = 0, 0
+
+	// Length then ones-complement of length.
+	nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
+	f.roffset += int64(nr)
+	if err != nil {
+		f.err = noEOF(err)
+		return
+	}
+	n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
+	nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
+	if nn != ^n {
+		if debugDecode {
+			ncomp := ^n
+			fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
+		}
+		f.err = CorruptInputError(f.roffset)
+		return
+	}
+
+	if n == 0 {
+		f.toRead = f.dict.readFlush()
+		f.finishBlock()
+		return
+	}
+
+	f.copyLen = int(n)
+	f.copyData()
+}
+
+// copyData copies f.copyLen bytes from the underlying reader into f.hist.
+// It pauses for reads when f.hist is full.
+func (f *decompressor) copyData() {
+	buf := f.dict.writeSlice()
+	if len(buf) > f.copyLen {
+		buf = buf[:f.copyLen]
+	}
+
+	cnt, err := io.ReadFull(f.r, buf)
+	f.roffset += int64(cnt)
+	f.copyLen -= cnt
+	f.dict.writeMark(cnt)
+	if err != nil {
+		f.err = noEOF(err)
+		return
+	}
+
+	if f.dict.availWrite() == 0 || f.copyLen > 0 {
+		f.toRead = f.dict.readFlush()
+		f.step = copyData
+		return
+	}
+	f.finishBlock()
+}
+
+func (f *decompressor) finishBlock() {
+	if f.final {
+		if f.dict.availRead() > 0 {
+			f.toRead = f.dict.readFlush()
+		}
+		f.err = io.EOF
+	}
+	f.step = nextBlock
+}
+
+func (f *decompressor) doStep() {
+	switch f.step {
+	case copyData:
+		f.copyData()
+	case nextBlock:
+		f.nextBlock()
+	case huffmanBytesBuffer:
+		f.huffmanBytesBuffer()
+	case huffmanBytesReader:
+		f.huffmanBytesReader()
+	case huffmanBufioReader:
+		f.huffmanBufioReader()
+	case huffmanStringsReader:
+		f.huffmanStringsReader()
+	case huffmanGenericReader:
+		f.huffmanGenericReader()
+	default:
+		panic("BUG: unexpected step state")
+	}
+}
+
+// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
+func noEOF(e error) error {
+	if e == io.EOF {
+		return io.ErrUnexpectedEOF
+	}
+	return e
+}
+
+func (f *decompressor) moreBits() error {
+	c, err := f.r.ReadByte()
+	if err != nil {
+		return noEOF(err)
+	}
+	f.roffset++
+	f.b |= uint32(c) << (f.nb & regSizeMaskUint32)
+	f.nb += 8
+	return nil
+}
+
+// Read the next Huffman-encoded symbol from f according to h.
+func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
+	// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+	// with single element, huffSym must error on these two edge cases. In both
+	// cases, the chunks slice will be 0 for the invalid sequence, leading it
+	// satisfy the n == 0 check below.
+	n := uint(h.maxRead)
+	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+	// but is smart enough to keep local variables in registers, so use nb and b,
+	// inline call to moreBits and reassign b,nb back to f on return.
+	nb, b := f.nb, f.b
+	for {
+		for nb < n {
+			c, err := f.r.ReadByte()
+			if err != nil {
+				f.b = b
+				f.nb = nb
+				return 0, noEOF(err)
+			}
+			f.roffset++
+			b |= uint32(c) << (nb & regSizeMaskUint32)
+			nb += 8
+		}
+		chunk := h.chunks[b&(huffmanNumChunks-1)]
+		n = uint(chunk & huffmanCountMask)
+		if n > huffmanChunkBits {
+			chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
+			n = uint(chunk & huffmanCountMask)
+		}
+		if n <= nb {
+			if n == 0 {
+				f.b = b
+				f.nb = nb
+				if debugDecode {
+					fmt.Println("huffsym: n==0")
+				}
+				f.err = CorruptInputError(f.roffset)
+				return 0, f.err
+			}
+			f.b = b >> (n & regSizeMaskUint32)
+			f.nb = nb - n
+			return int(chunk >> huffmanValueShift), nil
+		}
+	}
+}
+
+func makeReader(r io.Reader) Reader {
+	if rr, ok := r.(Reader); ok {
+		return rr
+	}
+	return bufio.NewReader(r)
+}
+
+func fixedHuffmanDecoderInit() {
+	fixedOnce.Do(func() {
+		// These come from the RFC section 3.2.6.
+		var bits [288]int
+		for i := 0; i < 144; i++ {
+			bits[i] = 8
+		}
+		for i := 144; i < 256; i++ {
+			bits[i] = 9
+		}
+		for i := 256; i < 280; i++ {
+			bits[i] = 7
+		}
+		for i := 280; i < 288; i++ {
+			bits[i] = 8
+		}
+		fixedHuffmanDecoder.init(bits[:])
+	})
+}
+
+func (f *decompressor) Reset(r io.Reader, dict []byte) error {
+	*f = decompressor{
+		r:        makeReader(r),
+		bits:     f.bits,
+		codebits: f.codebits,
+		h1:       f.h1,
+		h2:       f.h2,
+		dict:     f.dict,
+		step:     nextBlock,
+	}
+	f.dict.init(maxMatchOffset, dict)
+	return nil
+}
+
+// NewReader returns a new ReadCloser that can be used
+// to read the uncompressed version of r.
+// If r does not also implement io.ByteReader,
+// the decompressor may read more data than necessary from r.
+// It is the caller's responsibility to call Close on the ReadCloser
+// when finished reading.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
+func NewReader(r io.Reader) io.ReadCloser {
+	fixedHuffmanDecoderInit()
+
+	var f decompressor
+	f.r = makeReader(r)
+	f.bits = new([maxNumLit + maxNumDist]int)
+	f.codebits = new([numCodes]int)
+	f.step = nextBlock
+	f.dict.init(maxMatchOffset, nil)
+	return &f
+}
+
+// NewReaderDict is like NewReader but initializes the reader
+// with a preset dictionary. The returned Reader behaves as if
+// the uncompressed data stream started with the given dictionary,
+// which has already been read. NewReaderDict is typically used
+// to read data compressed by NewWriterDict.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
+func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
+	fixedHuffmanDecoderInit()
+
+	var f decompressor
+	f.r = makeReader(r)
+	f.bits = new([maxNumLit + maxNumDist]int)
+	f.codebits = new([numCodes]int)
+	f.step = nextBlock
+	f.dict.init(maxMatchOffset, dict)
+	return &f
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/inflate_gen.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/inflate_gen.go
--- a/gateway/vendor/github.com/klauspost/compress/flate/level1.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/level1.go
@ -0,0 +1,241 @@
+package flate
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math/bits"
+)
+
+// fastGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type fastEncL1 struct {
+	fastGen
+	table [tableSize]tableEntry
+}
+
+// EncodeL1 uses a similar algorithm to level 1
+func (e *fastEncL1) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashBytes              = 5
+	)
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+
+	for {
+		const skipLog = 5
+		const doEvery = 2
+
+		nextS := s
+		var candidate tableEntry
+		for {
+			nextHash := hashLen(cv, tableBits, hashBytes)
+			candidate = e.table[nextHash]
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+
+			now := load6432(src, nextS)
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
+			nextHash = hashLen(now, tableBits, hashBytes)
+
+			offset := s - (candidate.offset - e.cur)
+			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
+				break
+			}
+
+			// Do one right away...
+			cv = now
+			s = nextS
+			nextS++
+			candidate = e.table[nextHash]
+			now >>= 8
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
+
+			offset = s - (candidate.offset - e.cur)
+			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
+				break
+			}
+			cv = now
+			s = nextS
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			t := candidate.offset - e.cur
+			var l = int32(4)
+			if false {
+				l = e.matchlenLong(s+4, t+4, src) + 4
+			} else {
+				// inlined:
+				a := src[s+4:]
+				b := src[t+4:]
+				for len(a) >= 8 {
+					if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
+						l += int32(bits.TrailingZeros64(diff) >> 3)
+						break
+					}
+					l += 8
+					a = a[8:]
+					b = b[8:]
+				}
+				if len(a) < 8 {
+					b = b[:len(a)]
+					for i := range a {
+						if a[i] != b[i] {
+							break
+						}
+						l++
+					}
+				}
+			}
+
+			// Extend backwards
+			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+				s--
+				t--
+				l++
+			}
+			if nextEmit < s {
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
+			}
+
+			// Save the match found
+			if false {
+				dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+			} else {
+				// Inlined...
+				xoffset := uint32(s - t - baseMatchOffset)
+				xlength := l
+				oc := offsetCode(xoffset)
+				xoffset |= oc << 16
+				for xlength > 0 {
+					xl := xlength
+					if xl > 258 {
+						if xl > 258+baseMatchLength {
+							xl = 258
+						} else {
+							xl = 258 - baseMatchLength
+						}
+					}
+					xlength -= xl
+					xl -= baseMatchLength
+					dst.extraHist[lengthCodes1[uint8(xl)]]++
+					dst.offHist[oc]++
+					dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
+					dst.n++
+				}
+			}
+			s += l
+			nextEmit = s
+			if nextS >= s {
+				s = nextS + 1
+			}
+			if s >= sLimit {
+				// Index first pair after match end.
+				if int(s+l+8) < len(src) {
+					cv := load6432(src, s)
+					e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
+				}
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-2 and at s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load6432(src, s-2)
+			o := e.cur + s - 2
+			prevHash := hashLen(x, tableBits, hashBytes)
+			e.table[prevHash] = tableEntry{offset: o}
+			x >>= 16
+			currHash := hashLen(x, tableBits, hashBytes)
+			candidate = e.table[currHash]
+			e.table[currHash] = tableEntry{offset: o + 2}
+
+			offset := s - (candidate.offset - e.cur)
+			if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
+				cv = x >> 8
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/level2.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/level2.go
@ -0,0 +1,214 @@
+package flate
+
+import "fmt"
+
+// fastGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type fastEncL2 struct {
+	fastGen
+	table [bTableSize]tableEntry
+}
+
+// EncodeL2 uses a similar algorithm to level 1, but is capable
+// of matching across blocks giving better compression at a small slowdown.
+func (e *fastEncL2) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashBytes              = 5
+	)
+
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	for {
+		// When should we start skipping if we haven't found matches in a long while.
+		const skipLog = 5
+		const doEvery = 2
+
+		nextS := s
+		var candidate tableEntry
+		for {
+			nextHash := hashLen(cv, bTableBits, hashBytes)
+			s = nextS
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidate = e.table[nextHash]
+			now := load6432(src, nextS)
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
+			nextHash = hashLen(now, bTableBits, hashBytes)
+
+			offset := s - (candidate.offset - e.cur)
+			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
+				break
+			}
+
+			// Do one right away...
+			cv = now
+			s = nextS
+			nextS++
+			candidate = e.table[nextHash]
+			now >>= 8
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
+
+			offset = s - (candidate.offset - e.cur)
+			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+				break
+			}
+			cv = now
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			t := candidate.offset - e.cur
+			l := e.matchlenLong(s+4, t+4, src) + 4
+
+			// Extend backwards
+			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+				s--
+				t--
+				l++
+			}
+			if nextEmit < s {
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
+			}
+
+			dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+			s += l
+			nextEmit = s
+			if nextS >= s {
+				s = nextS + 1
+			}
+
+			if s >= sLimit {
+				// Index first pair after match end.
+				if int(s+l+8) < len(src) {
+					cv := load6432(src, s)
+					e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
+				}
+				goto emitRemainder
+			}
+
+			// Store every second hash in-between, but offset by 1.
+			for i := s - l + 2; i < s-5; i += 7 {
+				x := load6432(src, i)
+				nextHash := hashLen(x, bTableBits, hashBytes)
+				e.table[nextHash] = tableEntry{offset: e.cur + i}
+				// Skip one
+				x >>= 16
+				nextHash = hashLen(x, bTableBits, hashBytes)
+				e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
+				// Skip one
+				x >>= 16
+				nextHash = hashLen(x, bTableBits, hashBytes)
+				e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-2 to s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load6432(src, s-2)
+			o := e.cur + s - 2
+			prevHash := hashLen(x, bTableBits, hashBytes)
+			prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
+			e.table[prevHash] = tableEntry{offset: o}
+			e.table[prevHash2] = tableEntry{offset: o + 1}
+			currHash := hashLen(x>>16, bTableBits, hashBytes)
+			candidate = e.table[currHash]
+			e.table[currHash] = tableEntry{offset: o + 2}
+
+			offset := s - (candidate.offset - e.cur)
+			if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
+				cv = x >> 24
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/level3.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/level3.go
@ -0,0 +1,241 @@
+package flate
+
+import "fmt"
+
+// fastEncL3
+type fastEncL3 struct {
+	fastGen
+	table [1 << 16]tableEntryPrev
+}
+
+// Encode uses a similar algorithm to level 2, will check up to two candidates.
+func (e *fastEncL3) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		tableBits              = 16
+		tableSize              = 1 << tableBits
+		hashBytes              = 5
+	)
+
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntryPrev{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i]
+			if v.Cur.offset <= minOff {
+				v.Cur.offset = 0
+			} else {
+				v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+			}
+			if v.Prev.offset <= minOff {
+				v.Prev.offset = 0
+			} else {
+				v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+			}
+			e.table[i] = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// Skip if too small.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	for {
+		const skipLog = 7
+		nextS := s
+		var candidate tableEntry
+		for {
+			nextHash := hashLen(cv, tableBits, hashBytes)
+			s = nextS
+			nextS = s + 1 + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidates := e.table[nextHash]
+			now := load6432(src, nextS)
+
+			// Safe offset distance until s + 4...
+			minOffset := e.cur + s - (maxMatchOffset - 4)
+			e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}}
+
+			// Check both candidates
+			candidate = candidates.Cur
+			if candidate.offset < minOffset {
+				cv = now
+				// Previous will also be invalid, we have nothing.
+				continue
+			}
+
+			if uint32(cv) == load3232(src, candidate.offset-e.cur) {
+				if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) {
+					break
+				}
+				// Both match and are valid, pick longest.
+				offset := s - (candidate.offset - e.cur)
+				o2 := s - (candidates.Prev.offset - e.cur)
+				l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
+				if l2 > l1 {
+					candidate = candidates.Prev
+				}
+				break
+			} else {
+				// We only check if value mismatches.
+				// Offset will always be invalid in other cases.
+				candidate = candidates.Prev
+				if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+					break
+				}
+			}
+			cv = now
+		}
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			//
+			t := candidate.offset - e.cur
+			l := e.matchlenLong(s+4, t+4, src) + 4
+
+			// Extend backwards
+			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+				s--
+				t--
+				l++
+			}
+			if nextEmit < s {
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
+			}
+
+			dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+			s += l
+			nextEmit = s
+			if nextS >= s {
+				s = nextS + 1
+			}
+
+			if s >= sLimit {
+				t += l
+				// Index first pair after match end.
+				if int(t+8) < len(src) && t > 0 {
+					cv = load6432(src, t)
+					nextHash := hashLen(cv, tableBits, hashBytes)
+					e.table[nextHash] = tableEntryPrev{
+						Prev: e.table[nextHash].Cur,
+						Cur:  tableEntry{offset: e.cur + t},
+					}
+				}
+				goto emitRemainder
+			}
+
+			// Store every 5th hash in-between.
+			for i := s - l + 2; i < s-5; i += 6 {
+				nextHash := hashLen(load6432(src, i), tableBits, hashBytes)
+				e.table[nextHash] = tableEntryPrev{
+					Prev: e.table[nextHash].Cur,
+					Cur:  tableEntry{offset: e.cur + i}}
+			}
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-2 to s.
+			x := load6432(src, s-2)
+			prevHash := hashLen(x, tableBits, hashBytes)
+
+			e.table[prevHash] = tableEntryPrev{
+				Prev: e.table[prevHash].Cur,
+				Cur:  tableEntry{offset: e.cur + s - 2},
+			}
+			x >>= 8
+			prevHash = hashLen(x, tableBits, hashBytes)
+
+			e.table[prevHash] = tableEntryPrev{
+				Prev: e.table[prevHash].Cur,
+				Cur:  tableEntry{offset: e.cur + s - 1},
+			}
+			x >>= 8
+			currHash := hashLen(x, tableBits, hashBytes)
+			candidates := e.table[currHash]
+			cv = x
+			e.table[currHash] = tableEntryPrev{
+				Prev: candidates.Cur,
+				Cur:  tableEntry{offset: s + e.cur},
+			}
+
+			// Check both candidates
+			candidate = candidates.Cur
+			minOffset := e.cur + s - (maxMatchOffset - 4)
+
+			if candidate.offset > minOffset {
+				if uint32(cv) == load3232(src, candidate.offset-e.cur) {
+					// Found a match...
+					continue
+				}
+				candidate = candidates.Prev
+				if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+					// Match at prev...
+					continue
+				}
+			}
+			cv = x >> 8
+			s++
+			break
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/level4.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/level4.go
@ -0,0 +1,221 @@
+package flate
+
+import "fmt"
+
+type fastEncL4 struct {
+	fastGen
+	table  [tableSize]tableEntry
+	bTable [tableSize]tableEntry
+}
+
+func (e *fastEncL4) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashShortBytes         = 4
+	)
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			for i := range e.bTable[:] {
+				e.bTable[i] = tableEntry{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		for i := range e.bTable[:] {
+			v := e.bTable[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.bTable[i].offset = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	for {
+		const skipLog = 6
+		const doEvery = 1
+
+		nextS := s
+		var t int32
+		for {
+			nextHashS := hashLen(cv, tableBits, hashShortBytes)
+			nextHashL := hash7(cv, tableBits)
+
+			s = nextS
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			// Fetch a short+long candidate
+			sCandidate := e.table[nextHashS]
+			lCandidate := e.bTable[nextHashL]
+			next := load6432(src, nextS)
+			entry := tableEntry{offset: s + e.cur}
+			e.table[nextHashS] = entry
+			e.bTable[nextHashL] = entry
+
+			t = lCandidate.offset - e.cur
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) {
+				// We got a long match. Use that.
+				break
+			}
+
+			t = sCandidate.offset - e.cur
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
+				// Found a 4 match...
+				lCandidate = e.bTable[hash7(next, tableBits)]
+
+				// If the next long is a candidate, check if we should use that instead...
+				lOff := nextS - (lCandidate.offset - e.cur)
+				if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) {
+					l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
+					if l2 > l1 {
+						s = nextS
+						t = lCandidate.offset - e.cur
+					}
+				}
+				break
+			}
+			cv = next
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		// Extend the 4-byte match as long as possible.
+		l := e.matchlenLong(s+4, t+4, src) + 4
+
+		// Extend backwards
+		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+			s--
+			t--
+			l++
+		}
+		if nextEmit < s {
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
+		}
+		if debugDeflate {
+			if t >= s {
+				panic("s-t")
+			}
+			if (s - t) > maxMatchOffset {
+				panic(fmt.Sprintln("mmo", t))
+			}
+			if l < baseMatchLength {
+				panic("bml")
+			}
+		}
+
+		dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+		s += l
+		nextEmit = s
+		if nextS >= s {
+			s = nextS + 1
+		}
+
+		if s >= sLimit {
+			// Index first pair after match end.
+			if int(s+8) < len(src) {
+				cv := load6432(src, s)
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur}
+				e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
+			}
+			goto emitRemainder
+		}
+
+		// Store every 3rd hash in-between
+		if true {
+			i := nextS
+			if i < s-1 {
+				cv := load6432(src, i)
+				t := tableEntry{offset: i + e.cur}
+				t2 := tableEntry{offset: t.offset + 1}
+				e.bTable[hash7(cv, tableBits)] = t
+				e.bTable[hash7(cv>>8, tableBits)] = t2
+				e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
+
+				i += 3
+				for ; i < s-1; i += 3 {
+					cv := load6432(src, i)
+					t := tableEntry{offset: i + e.cur}
+					t2 := tableEntry{offset: t.offset + 1}
+					e.bTable[hash7(cv, tableBits)] = t
+					e.bTable[hash7(cv>>8, tableBits)] = t2
+					e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
+				}
+			}
+		}
+
+		// We could immediately start working at s now, but to improve
+		// compression we first update the hash table at s-1 and at s.
+		x := load6432(src, s-1)
+		o := e.cur + s - 1
+		prevHashS := hashLen(x, tableBits, hashShortBytes)
+		prevHashL := hash7(x, tableBits)
+		e.table[prevHashS] = tableEntry{offset: o}
+		e.bTable[prevHashL] = tableEntry{offset: o}
+		cv = x >> 8
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/level5.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/level5.go
@ -0,0 +1,708 @@
+package flate
+
+import "fmt"
+
+type fastEncL5 struct {
+	fastGen
+	table  [tableSize]tableEntry
+	bTable [tableSize]tableEntryPrev
+}
+
+func (e *fastEncL5) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashShortBytes         = 4
+	)
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			for i := range e.bTable[:] {
+				e.bTable[i] = tableEntryPrev{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		for i := range e.bTable[:] {
+			v := e.bTable[i]
+			if v.Cur.offset <= minOff {
+				v.Cur.offset = 0
+				v.Prev.offset = 0
+			} else {
+				v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+				if v.Prev.offset <= minOff {
+					v.Prev.offset = 0
+				} else {
+					v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+				}
+			}
+			e.bTable[i] = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	for {
+		const skipLog = 6
+		const doEvery = 1
+
+		nextS := s
+		var l int32
+		var t int32
+		for {
+			nextHashS := hashLen(cv, tableBits, hashShortBytes)
+			nextHashL := hash7(cv, tableBits)
+
+			s = nextS
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			// Fetch a short+long candidate
+			sCandidate := e.table[nextHashS]
+			lCandidate := e.bTable[nextHashL]
+			next := load6432(src, nextS)
+			entry := tableEntry{offset: s + e.cur}
+			e.table[nextHashS] = entry
+			eLong := &e.bTable[nextHashL]
+			eLong.Cur, eLong.Prev = entry, eLong.Cur
+
+			nextHashS = hashLen(next, tableBits, hashShortBytes)
+			nextHashL = hash7(next, tableBits)
+
+			t = lCandidate.Cur.offset - e.cur
+			if s-t < maxMatchOffset {
+				if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+					t2 := lCandidate.Prev.offset - e.cur
+					if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+						l = e.matchlen(s+4, t+4, src) + 4
+						ml1 := e.matchlen(s+4, t2+4, src) + 4
+						if ml1 > l {
+							t = t2
+							l = ml1
+							break
+						}
+					}
+					break
+				}
+				t = lCandidate.Prev.offset - e.cur
+				if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+					break
+				}
+			}
+
+			t = sCandidate.offset - e.cur
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
+				// Found a 4 match...
+				l = e.matchlen(s+4, t+4, src) + 4
+				lCandidate = e.bTable[nextHashL]
+				// Store the next match
+
+				e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+				eLong := &e.bTable[nextHashL]
+				eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+				// If the next long is a candidate, use that...
+				t2 := lCandidate.Cur.offset - e.cur
+				if nextS-t2 < maxMatchOffset {
+					if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							break
+						}
+					}
+					// If the previous long is a candidate, use that...
+					t2 = lCandidate.Prev.offset - e.cur
+					if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							break
+						}
+					}
+				}
+				break
+			}
+			cv = next
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		if l == 0 {
+			// Extend the 4-byte match as long as possible.
+			l = e.matchlenLong(s+4, t+4, src) + 4
+		} else if l == maxMatchLength {
+			l += e.matchlenLong(s+l, t+l, src)
+		}
+
+		// Try to locate a better match by checking the end of best match...
+		if sAt := s + l; l < 30 && sAt < sLimit {
+			// Allow some bytes at the beginning to mismatch.
+			// Sweet spot is 2/3 bytes depending on input.
+			// 3 is only a little better when it is but sometimes a lot worse.
+			// The skipped bytes are tested in Extend backwards,
+			// and still picked up as part of the match if they do.
+			const skipBeginning = 2
+			eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
+			t2 := eLong - e.cur - l + skipBeginning
+			s2 := s + skipBeginning
+			off := s2 - t2
+			if t2 >= 0 && off < maxMatchOffset && off > 0 {
+				if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+					t = t2
+					l = l2
+					s = s2
+				}
+			}
+		}
+
+		// Extend backwards
+		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+			s--
+			t--
+			l++
+		}
+		if nextEmit < s {
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
+		}
+		if debugDeflate {
+			if t >= s {
+				panic(fmt.Sprintln("s-t", s, t))
+			}
+			if (s - t) > maxMatchOffset {
+				panic(fmt.Sprintln("mmo", s-t))
+			}
+			if l < baseMatchLength {
+				panic("bml")
+			}
+		}
+
+		dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+		s += l
+		nextEmit = s
+		if nextS >= s {
+			s = nextS + 1
+		}
+
+		if s >= sLimit {
+			goto emitRemainder
+		}
+
+		// Store every 3rd hash in-between.
+		if true {
+			const hashEvery = 3
+			i := s - l + 1
+			if i < s-1 {
+				cv := load6432(src, i)
+				t := tableEntry{offset: i + e.cur}
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+				eLong := &e.bTable[hash7(cv, tableBits)]
+				eLong.Cur, eLong.Prev = t, eLong.Cur
+
+				// Do an long at i+1
+				cv >>= 8
+				t = tableEntry{offset: t.offset + 1}
+				eLong = &e.bTable[hash7(cv, tableBits)]
+				eLong.Cur, eLong.Prev = t, eLong.Cur
+
+				// We only have enough bits for a short entry at i+2
+				cv >>= 8
+				t = tableEntry{offset: t.offset + 1}
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+
+				// Skip one - otherwise we risk hitting 's'
+				i += 4
+				for ; i < s-1; i += hashEvery {
+					cv := load6432(src, i)
+					t := tableEntry{offset: i + e.cur}
+					t2 := tableEntry{offset: t.offset + 1}
+					eLong := &e.bTable[hash7(cv, tableBits)]
+					eLong.Cur, eLong.Prev = t, eLong.Cur
+					e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
+				}
+			}
+		}
+
+		// We could immediately start working at s now, but to improve
+		// compression we first update the hash table at s-1 and at s.
+		x := load6432(src, s-1)
+		o := e.cur + s - 1
+		prevHashS := hashLen(x, tableBits, hashShortBytes)
+		prevHashL := hash7(x, tableBits)
+		e.table[prevHashS] = tableEntry{offset: o}
+		eLong := &e.bTable[prevHashL]
+		eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
+		cv = x >> 8
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
+
+// fastEncL5Window is a level 5 encoder,
+// but with a custom window size.
+type fastEncL5Window struct {
+	hist      []byte
+	cur       int32
+	maxOffset int32
+	table     [tableSize]tableEntry
+	bTable    [tableSize]tableEntryPrev
+}
+
+func (e *fastEncL5Window) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashShortBytes         = 4
+	)
+	maxMatchOffset := e.maxOffset
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			for i := range e.bTable[:] {
+				e.bTable[i] = tableEntryPrev{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		for i := range e.bTable[:] {
+			v := e.bTable[i]
+			if v.Cur.offset <= minOff {
+				v.Cur.offset = 0
+				v.Prev.offset = 0
+			} else {
+				v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+				if v.Prev.offset <= minOff {
+					v.Prev.offset = 0
+				} else {
+					v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+				}
+			}
+			e.bTable[i] = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	for {
+		const skipLog = 6
+		const doEvery = 1
+
+		nextS := s
+		var l int32
+		var t int32
+		for {
+			nextHashS := hashLen(cv, tableBits, hashShortBytes)
+			nextHashL := hash7(cv, tableBits)
+
+			s = nextS
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			// Fetch a short+long candidate
+			sCandidate := e.table[nextHashS]
+			lCandidate := e.bTable[nextHashL]
+			next := load6432(src, nextS)
+			entry := tableEntry{offset: s + e.cur}
+			e.table[nextHashS] = entry
+			eLong := &e.bTable[nextHashL]
+			eLong.Cur, eLong.Prev = entry, eLong.Cur
+
+			nextHashS = hashLen(next, tableBits, hashShortBytes)
+			nextHashL = hash7(next, tableBits)
+
+			t = lCandidate.Cur.offset - e.cur
+			if s-t < maxMatchOffset {
+				if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+					t2 := lCandidate.Prev.offset - e.cur
+					if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+						l = e.matchlen(s+4, t+4, src) + 4
+						ml1 := e.matchlen(s+4, t2+4, src) + 4
+						if ml1 > l {
+							t = t2
+							l = ml1
+							break
+						}
+					}
+					break
+				}
+				t = lCandidate.Prev.offset - e.cur
+				if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+					break
+				}
+			}
+
+			t = sCandidate.offset - e.cur
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
+				// Found a 4 match...
+				l = e.matchlen(s+4, t+4, src) + 4
+				lCandidate = e.bTable[nextHashL]
+				// Store the next match
+
+				e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+				eLong := &e.bTable[nextHashL]
+				eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+				// If the next long is a candidate, use that...
+				t2 := lCandidate.Cur.offset - e.cur
+				if nextS-t2 < maxMatchOffset {
+					if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							break
+						}
+					}
+					// If the previous long is a candidate, use that...
+					t2 = lCandidate.Prev.offset - e.cur
+					if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							break
+						}
+					}
+				}
+				break
+			}
+			cv = next
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		if l == 0 {
+			// Extend the 4-byte match as long as possible.
+			l = e.matchlenLong(s+4, t+4, src) + 4
+		} else if l == maxMatchLength {
+			l += e.matchlenLong(s+l, t+l, src)
+		}
+
+		// Try to locate a better match by checking the end of best match...
+		if sAt := s + l; l < 30 && sAt < sLimit {
+			// Allow some bytes at the beginning to mismatch.
+			// Sweet spot is 2/3 bytes depending on input.
+			// 3 is only a little better when it is but sometimes a lot worse.
+			// The skipped bytes are tested in Extend backwards,
+			// and still picked up as part of the match if they do.
+			const skipBeginning = 2
+			eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
+			t2 := eLong - e.cur - l + skipBeginning
+			s2 := s + skipBeginning
+			off := s2 - t2
+			if t2 >= 0 && off < maxMatchOffset && off > 0 {
+				if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+					t = t2
+					l = l2
+					s = s2
+				}
+			}
+		}
+
+		// Extend backwards
+		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+			s--
+			t--
+			l++
+		}
+		if nextEmit < s {
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
+		}
+		if debugDeflate {
+			if t >= s {
+				panic(fmt.Sprintln("s-t", s, t))
+			}
+			if (s - t) > maxMatchOffset {
+				panic(fmt.Sprintln("mmo", s-t))
+			}
+			if l < baseMatchLength {
+				panic("bml")
+			}
+		}
+
+		dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+		s += l
+		nextEmit = s
+		if nextS >= s {
+			s = nextS + 1
+		}
+
+		if s >= sLimit {
+			goto emitRemainder
+		}
+
+		// Store every 3rd hash in-between.
+		if true {
+			const hashEvery = 3
+			i := s - l + 1
+			if i < s-1 {
+				cv := load6432(src, i)
+				t := tableEntry{offset: i + e.cur}
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+				eLong := &e.bTable[hash7(cv, tableBits)]
+				eLong.Cur, eLong.Prev = t, eLong.Cur
+
+				// Do an long at i+1
+				cv >>= 8
+				t = tableEntry{offset: t.offset + 1}
+				eLong = &e.bTable[hash7(cv, tableBits)]
+				eLong.Cur, eLong.Prev = t, eLong.Cur
+
+				// We only have enough bits for a short entry at i+2
+				cv >>= 8
+				t = tableEntry{offset: t.offset + 1}
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+
+				// Skip one - otherwise we risk hitting 's'
+				i += 4
+				for ; i < s-1; i += hashEvery {
+					cv := load6432(src, i)
+					t := tableEntry{offset: i + e.cur}
+					t2 := tableEntry{offset: t.offset + 1}
+					eLong := &e.bTable[hash7(cv, tableBits)]
+					eLong.Cur, eLong.Prev = t, eLong.Cur
+					e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
+				}
+			}
+		}
+
+		// We could immediately start working at s now, but to improve
+		// compression we first update the hash table at s-1 and at s.
+		x := load6432(src, s-1)
+		o := e.cur + s - 1
+		prevHashS := hashLen(x, tableBits, hashShortBytes)
+		prevHashL := hash7(x, tableBits)
+		e.table[prevHashS] = tableEntry{offset: o}
+		eLong := &e.bTable[prevHashL]
+		eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
+		cv = x >> 8
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
+
+// Reset the encoding table.
+func (e *fastEncL5Window) Reset() {
+	// We keep the same allocs, since we are compressing the same block sizes.
+	if cap(e.hist) < allocHistory {
+		e.hist = make([]byte, 0, allocHistory)
+	}
+
+	// We offset current position so everything will be out of reach.
+	// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
+	if e.cur <= int32(bufferReset) {
+		e.cur += e.maxOffset + int32(len(e.hist))
+	}
+	e.hist = e.hist[:0]
+}
+
+func (e *fastEncL5Window) addBlock(src []byte) int32 {
+	// check if we have space already
+	maxMatchOffset := e.maxOffset
+
+	if len(e.hist)+len(src) > cap(e.hist) {
+		if cap(e.hist) == 0 {
+			e.hist = make([]byte, 0, allocHistory)
+		} else {
+			if cap(e.hist) < int(maxMatchOffset*2) {
+				panic("unexpected buffer size")
+			}
+			// Move down
+			offset := int32(len(e.hist)) - maxMatchOffset
+			copy(e.hist[0:maxMatchOffset], e.hist[offset:])
+			e.cur += offset
+			e.hist = e.hist[:maxMatchOffset]
+		}
+	}
+	s := int32(len(e.hist))
+	e.hist = append(e.hist, src...)
+	return s
+}
+
+// matchlen will return the match length between offsets and t in src.
+// The maximum length returned is maxMatchLength - 4.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 {
+	if debugDecode {
+		if t >= s {
+			panic(fmt.Sprint("t >=s:", t, s))
+		}
+		if int(s) >= len(src) {
+			panic(fmt.Sprint("s >= len(src):", s, len(src)))
+		}
+		if t < 0 {
+			panic(fmt.Sprint("t < 0:", t))
+		}
+		if s-t > e.maxOffset {
+			panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+		}
+	}
+	s1 := int(s) + maxMatchLength - 4
+	if s1 > len(src) {
+		s1 = len(src)
+	}
+
+	// Extend the match to be as long as possible.
+	return int32(matchLen(src[s:s1], src[t:]))
+}
+
+// matchlenLong will return the match length between offsets and t in src.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 {
+	if debugDeflate {
+		if t >= s {
+			panic(fmt.Sprint("t >=s:", t, s))
+		}
+		if int(s) >= len(src) {
+			panic(fmt.Sprint("s >= len(src):", s, len(src)))
+		}
+		if t < 0 {
+			panic(fmt.Sprint("t < 0:", t))
+		}
+		if s-t > e.maxOffset {
+			panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+		}
+	}
+	// Extend the match to be as long as possible.
+	return int32(matchLen(src[s:], src[t:]))
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/level6.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/level6.go
@ -0,0 +1,325 @@
+package flate
+
+import "fmt"
+
+type fastEncL6 struct {
+	fastGen
+	table  [tableSize]tableEntry
+	bTable [tableSize]tableEntryPrev
+}
+
+func (e *fastEncL6) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashShortBytes         = 4
+	)
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			for i := range e.bTable[:] {
+				e.bTable[i] = tableEntryPrev{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		for i := range e.bTable[:] {
+			v := e.bTable[i]
+			if v.Cur.offset <= minOff {
+				v.Cur.offset = 0
+				v.Prev.offset = 0
+			} else {
+				v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+				if v.Prev.offset <= minOff {
+					v.Prev.offset = 0
+				} else {
+					v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+				}
+			}
+			e.bTable[i] = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	// Repeat MUST be > 1 and within range
+	repeat := int32(1)
+	for {
+		const skipLog = 7
+		const doEvery = 1
+
+		nextS := s
+		var l int32
+		var t int32
+		for {
+			nextHashS := hashLen(cv, tableBits, hashShortBytes)
+			nextHashL := hash7(cv, tableBits)
+			s = nextS
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			// Fetch a short+long candidate
+			sCandidate := e.table[nextHashS]
+			lCandidate := e.bTable[nextHashL]
+			next := load6432(src, nextS)
+			entry := tableEntry{offset: s + e.cur}
+			e.table[nextHashS] = entry
+			eLong := &e.bTable[nextHashL]
+			eLong.Cur, eLong.Prev = entry, eLong.Cur
+
+			// Calculate hashes of 'next'
+			nextHashS = hashLen(next, tableBits, hashShortBytes)
+			nextHashL = hash7(next, tableBits)
+
+			t = lCandidate.Cur.offset - e.cur
+			if s-t < maxMatchOffset {
+				if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
+					// Long candidate matches at least 4 bytes.
+
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+					// Check the previous long candidate as well.
+					t2 := lCandidate.Prev.offset - e.cur
+					if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+						l = e.matchlen(s+4, t+4, src) + 4
+						ml1 := e.matchlen(s+4, t2+4, src) + 4
+						if ml1 > l {
+							t = t2
+							l = ml1
+							break
+						}
+					}
+					break
+				}
+				// Current value did not match, but check if previous long value does.
+				t = lCandidate.Prev.offset - e.cur
+				if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+					break
+				}
+			}
+
+			t = sCandidate.offset - e.cur
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
+				// Found a 4 match...
+				l = e.matchlen(s+4, t+4, src) + 4
+
+				// Look up next long candidate (at nextS)
+				lCandidate = e.bTable[nextHashL]
+
+				// Store the next match
+				e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+				eLong := &e.bTable[nextHashL]
+				eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+				// Check repeat at s + repOff
+				const repOff = 1
+				t2 := s - repeat + repOff
+				if load3232(src, t2) == uint32(cv>>(8*repOff)) {
+					ml := e.matchlen(s+4+repOff, t2+4, src) + 4
+					if ml > l {
+						t = t2
+						l = ml
+						s += repOff
+						// Not worth checking more.
+						break
+					}
+				}
+
+				// If the next long is a candidate, use that...
+				t2 = lCandidate.Cur.offset - e.cur
+				if nextS-t2 < maxMatchOffset {
+					if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							// This is ok, but check previous as well.
+						}
+					}
+					// If the previous long is a candidate, use that...
+					t2 = lCandidate.Prev.offset - e.cur
+					if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							break
+						}
+					}
+				}
+				break
+			}
+			cv = next
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		// Extend the 4-byte match as long as possible.
+		if l == 0 {
+			l = e.matchlenLong(s+4, t+4, src) + 4
+		} else if l == maxMatchLength {
+			l += e.matchlenLong(s+l, t+l, src)
+		}
+
+		// Try to locate a better match by checking the end-of-match...
+		if sAt := s + l; sAt < sLimit {
+			// Allow some bytes at the beginning to mismatch.
+			// Sweet spot is 2/3 bytes depending on input.
+			// 3 is only a little better when it is but sometimes a lot worse.
+			// The skipped bytes are tested in Extend backwards,
+			// and still picked up as part of the match if they do.
+			const skipBeginning = 2
+			eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)]
+			// Test current
+			t2 := eLong.Cur.offset - e.cur - l + skipBeginning
+			s2 := s + skipBeginning
+			off := s2 - t2
+			if off < maxMatchOffset {
+				if off > 0 && t2 >= 0 {
+					if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+						t = t2
+						l = l2
+						s = s2
+					}
+				}
+				// Test next:
+				t2 = eLong.Prev.offset - e.cur - l + skipBeginning
+				off := s2 - t2
+				if off > 0 && off < maxMatchOffset && t2 >= 0 {
+					if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+						t = t2
+						l = l2
+						s = s2
+					}
+				}
+			}
+		}
+
+		// Extend backwards
+		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+			s--
+			t--
+			l++
+		}
+		if nextEmit < s {
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
+		}
+		if false {
+			if t >= s {
+				panic(fmt.Sprintln("s-t", s, t))
+			}
+			if (s - t) > maxMatchOffset {
+				panic(fmt.Sprintln("mmo", s-t))
+			}
+			if l < baseMatchLength {
+				panic("bml")
+			}
+		}
+
+		dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+		repeat = s - t
+		s += l
+		nextEmit = s
+		if nextS >= s {
+			s = nextS + 1
+		}
+
+		if s >= sLimit {
+			// Index after match end.
+			for i := nextS + 1; i < int32(len(src))-8; i += 2 {
+				cv := load6432(src, i)
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur}
+				eLong := &e.bTable[hash7(cv, tableBits)]
+				eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
+			}
+			goto emitRemainder
+		}
+
+		// Store every long hash in-between and every second short.
+		if true {
+			for i := nextS + 1; i < s-1; i += 2 {
+				cv := load6432(src, i)
+				t := tableEntry{offset: i + e.cur}
+				t2 := tableEntry{offset: t.offset + 1}
+				eLong := &e.bTable[hash7(cv, tableBits)]
+				eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+				eLong.Cur, eLong.Prev = t, eLong.Cur
+				eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
+			}
+		}
+
+		// We could immediately start working at s now, but to improve
+		// compression we first update the hash table at s-1 and at s.
+		cv = load6432(src, s)
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go
@ -0,0 +1,16 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package flate
+
+// matchLen returns how many bytes match in a and b
+//
+// It assumes that:
+//
+//	len(a) <= len(b) and len(a) > 0
+//
+//go:noescape
+func matchLen(a []byte, b []byte) int
--- a/gateway/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s
+++ b/gateway/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s
@ -0,0 +1,68 @@
+// Copied from S2 implementation.
+
+//go:build !appengine && !noasm && gc && !noasm
+
+#include "textflag.h"
+
+// func matchLen(a []byte, b []byte) int
+// Requires: BMI
+TEXT ·matchLen(SB), NOSPLIT, $0-56
+	MOVQ a_base+0(FP), AX
+	MOVQ b_base+24(FP), CX
+	MOVQ a_len+8(FP), DX
+
+	// matchLen
+	XORL SI, SI
+	CMPL DX, $0x08
+	JB   matchlen_match4_standalone
+
+matchlen_loopback_standalone:
+	MOVQ  (AX)(SI*1), BX
+	XORQ  (CX)(SI*1), BX
+	TESTQ BX, BX
+	JZ    matchlen_loop_standalone
+
+#ifdef GOAMD64_v3
+	TZCNTQ BX, BX
+#else
+	BSFQ BX, BX
+#endif
+	SARQ $0x03, BX
+	LEAL (SI)(BX*1), SI
+	JMP  gen_match_len_end
+
+matchlen_loop_standalone:
+	LEAL -8(DX), DX
+	LEAL 8(SI), SI
+	CMPL DX, $0x08
+	JAE  matchlen_loopback_standalone
+
+matchlen_match4_standalone:
+	CMPL DX, $0x04
+	JB   matchlen_match2_standalone
+	MOVL (AX)(SI*1), BX
+	CMPL (CX)(SI*1), BX
+	JNE  matchlen_match2_standalone
+	LEAL -4(DX), DX
+	LEAL 4(SI), SI
+
+matchlen_match2_standalone:
+	CMPL DX, $0x02
+	JB   matchlen_match1_standalone
+	MOVW (AX)(SI*1), BX
+	CMPW (CX)(SI*1), BX
+	JNE  matchlen_match1_standalone
+	LEAL -2(DX), DX
+	LEAL 2(SI), SI
+
+matchlen_match1_standalone:
+	CMPL DX, $0x01
+	JB   gen_match_len_end
+	MOVB (AX)(SI*1), BL
+	CMPB (CX)(SI*1), BL
+	JNE  gen_match_len_end
+	INCL SI
+
+gen_match_len_end:
+	MOVQ SI, ret+48(FP)
+	RET
--- a/gateway/vendor/github.com/klauspost/compress/flate/matchlen_generic.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/matchlen_generic.go
@ -0,0 +1,33 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package flate
+
+import (
+	"encoding/binary"
+	"math/bits"
+)
+
+// matchLen returns the maximum common prefix length of a and b.
+// a must be the shortest of the two.
+func matchLen(a, b []byte) (n int) {
+	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
+		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
+		if diff != 0 {
+			return n + bits.TrailingZeros64(diff)>>3
+		}
+		n += 8
+	}
+
+	for i := range a {
+		if a[i] != b[i] {
+			break
+		}
+		n++
+	}
+	return n
+
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/regmask_amd64.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/regmask_amd64.go
@ -0,0 +1,37 @@
+package flate
+
+const (
+	// Masks for shifts with register sizes of the shift value.
+	// This can be used to work around the x86 design of shifting by mod register size.
+	// It can be used when a variable shift is always smaller than the register size.
+
+	// reg8SizeMaskX - shift value is 8 bits, shifted is X
+	reg8SizeMask8  = 7
+	reg8SizeMask16 = 15
+	reg8SizeMask32 = 31
+	reg8SizeMask64 = 63
+
+	// reg16SizeMaskX - shift value is 16 bits, shifted is X
+	reg16SizeMask8  = reg8SizeMask8
+	reg16SizeMask16 = reg8SizeMask16
+	reg16SizeMask32 = reg8SizeMask32
+	reg16SizeMask64 = reg8SizeMask64
+
+	// reg32SizeMaskX - shift value is 32 bits, shifted is X
+	reg32SizeMask8  = reg8SizeMask8
+	reg32SizeMask16 = reg8SizeMask16
+	reg32SizeMask32 = reg8SizeMask32
+	reg32SizeMask64 = reg8SizeMask64
+
+	// reg64SizeMaskX - shift value is 64 bits, shifted is X
+	reg64SizeMask8  = reg8SizeMask8
+	reg64SizeMask16 = reg8SizeMask16
+	reg64SizeMask32 = reg8SizeMask32
+	reg64SizeMask64 = reg8SizeMask64
+
+	// regSizeMaskUintX - shift value is uint, shifted is X
+	regSizeMaskUint8  = reg8SizeMask8
+	regSizeMaskUint16 = reg8SizeMask16
+	regSizeMaskUint32 = reg8SizeMask32
+	regSizeMaskUint64 = reg8SizeMask64
+)
--- a/gateway/vendor/github.com/klauspost/compress/flate/regmask_other.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/regmask_other.go
@ -0,0 +1,40 @@
+//go:build !amd64
+// +build !amd64
+
+package flate
+
+const (
+	// Masks for shifts with register sizes of the shift value.
+	// This can be used to work around the x86 design of shifting by mod register size.
+	// It can be used when a variable shift is always smaller than the register size.
+
+	// reg8SizeMaskX - shift value is 8 bits, shifted is X
+	reg8SizeMask8  = 0xff
+	reg8SizeMask16 = 0xff
+	reg8SizeMask32 = 0xff
+	reg8SizeMask64 = 0xff
+
+	// reg16SizeMaskX - shift value is 16 bits, shifted is X
+	reg16SizeMask8  = 0xffff
+	reg16SizeMask16 = 0xffff
+	reg16SizeMask32 = 0xffff
+	reg16SizeMask64 = 0xffff
+
+	// reg32SizeMaskX - shift value is 32 bits, shifted is X
+	reg32SizeMask8  = 0xffffffff
+	reg32SizeMask16 = 0xffffffff
+	reg32SizeMask32 = 0xffffffff
+	reg32SizeMask64 = 0xffffffff
+
+	// reg64SizeMaskX - shift value is 64 bits, shifted is X
+	reg64SizeMask8  = 0xffffffffffffffff
+	reg64SizeMask16 = 0xffffffffffffffff
+	reg64SizeMask32 = 0xffffffffffffffff
+	reg64SizeMask64 = 0xffffffffffffffff
+
+	// regSizeMaskUintX - shift value is uint, shifted is X
+	regSizeMaskUint8  = ^uint(0)
+	regSizeMaskUint16 = ^uint(0)
+	regSizeMaskUint32 = ^uint(0)
+	regSizeMaskUint64 = ^uint(0)
+)
--- a/gateway/vendor/github.com/klauspost/compress/flate/stateless.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/stateless.go
@ -0,0 +1,318 @@
+package flate
+
+import (
+	"io"
+	"math"
+	"sync"
+)
+
+const (
+	maxStatelessBlock = math.MaxInt16
+	// dictionary will be taken from maxStatelessBlock, so limit it.
+	maxStatelessDict = 8 << 10
+
+	slTableBits  = 13
+	slTableSize  = 1 << slTableBits
+	slTableShift = 32 - slTableBits
+)
+
+type statelessWriter struct {
+	dst    io.Writer
+	closed bool
+}
+
+func (s *statelessWriter) Close() error {
+	if s.closed {
+		return nil
+	}
+	s.closed = true
+	// Emit EOF block
+	return StatelessDeflate(s.dst, nil, true, nil)
+}
+
+func (s *statelessWriter) Write(p []byte) (n int, err error) {
+	err = StatelessDeflate(s.dst, p, false, nil)
+	if err != nil {
+		return 0, err
+	}
+	return len(p), nil
+}
+
+func (s *statelessWriter) Reset(w io.Writer) {
+	s.dst = w
+	s.closed = false
+}
+
+// NewStatelessWriter will do compression but without maintaining any state
+// between Write calls.
+// There will be no memory kept between Write calls,
+// but compression and speed will be suboptimal.
+// Because of this, the size of actual Write calls will affect output size.
+func NewStatelessWriter(dst io.Writer) io.WriteCloser {
+	return &statelessWriter{dst: dst}
+}
+
+// bitWriterPool contains bit writers that can be reused.
+var bitWriterPool = sync.Pool{
+	New: func() interface{} {
+		return newHuffmanBitWriter(nil)
+	},
+}
+
+// StatelessDeflate allows compressing directly to a Writer without retaining state.
+// When returning everything will be flushed.
+// Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
+// Longer dictionaries will be truncated and will still produce valid output.
+// Sending nil dictionary is perfectly fine.
+func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
+	var dst tokens
+	bw := bitWriterPool.Get().(*huffmanBitWriter)
+	bw.reset(out)
+	defer func() {
+		// don't keep a reference to our output
+		bw.reset(nil)
+		bitWriterPool.Put(bw)
+	}()
+	if eof && len(in) == 0 {
+		// Just write an EOF block.
+		// Could be faster...
+		bw.writeStoredHeader(0, true)
+		bw.flush()
+		return bw.err
+	}
+
+	// Truncate dict
+	if len(dict) > maxStatelessDict {
+		dict = dict[len(dict)-maxStatelessDict:]
+	}
+
+	// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
+	var inDict []byte
+
+	for len(in) > 0 {
+		todo := in
+		if len(inDict) > 0 {
+			if len(todo) > maxStatelessBlock-maxStatelessDict {
+				todo = todo[:maxStatelessBlock-maxStatelessDict]
+			}
+		} else if len(todo) > maxStatelessBlock-len(dict) {
+			todo = todo[:maxStatelessBlock-len(dict)]
+		}
+		inOrg := in
+		in = in[len(todo):]
+		uncompressed := todo
+		if len(dict) > 0 {
+			// combine dict and source
+			bufLen := len(todo) + len(dict)
+			combined := make([]byte, bufLen)
+			copy(combined, dict)
+			copy(combined[len(dict):], todo)
+			todo = combined
+		}
+		// Compress
+		if len(inDict) == 0 {
+			statelessEnc(&dst, todo, int16(len(dict)))
+		} else {
+			statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
+		}
+		isEof := eof && len(in) == 0
+
+		if dst.n == 0 {
+			bw.writeStoredHeader(len(uncompressed), isEof)
+			if bw.err != nil {
+				return bw.err
+			}
+			bw.writeBytes(uncompressed)
+		} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
+			// If we removed less than 1/16th, huffman compress the block.
+			bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
+		} else {
+			bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
+		}
+		if len(in) > 0 {
+			// Retain a dict if we have more
+			inDict = inOrg[len(uncompressed)-maxStatelessDict:]
+			dict = nil
+			dst.Reset()
+		}
+		if bw.err != nil {
+			return bw.err
+		}
+	}
+	if !eof {
+		// Align, only a stored block can do that.
+		bw.writeStoredHeader(0, false)
+	}
+	bw.flush()
+	return bw.err
+}
+
+func hashSL(u uint32) uint32 {
+	return (u * 0x1e35a7bd) >> slTableShift
+}
+
+func load3216(b []byte, i int16) uint32 {
+	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
+	b = b[i:]
+	b = b[:4]
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load6416(b []byte, i int16) uint64 {
+	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
+	b = b[i:]
+	b = b[:8]
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func statelessEnc(dst *tokens, src []byte, startAt int16) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+	)
+
+	type tableEntry struct {
+		offset int16
+	}
+
+	var table [slTableSize]tableEntry
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src)-int(startAt) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = 0
+		return
+	}
+	// Index until startAt
+	if startAt > 0 {
+		cv := load3232(src, 0)
+		for i := int16(0); i < startAt; i++ {
+			table[hashSL(cv)] = tableEntry{offset: i}
+			cv = (cv >> 8) | (uint32(src[i+4]) << 24)
+		}
+	}
+
+	s := startAt + 1
+	nextEmit := startAt
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int16(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load3216(src, s)
+
+	for {
+		const skipLog = 5
+		const doEvery = 2
+
+		nextS := s
+		var candidate tableEntry
+		for {
+			nextHash := hashSL(cv)
+			candidate = table[nextHash]
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit || nextS <= 0 {
+				goto emitRemainder
+			}
+
+			now := load6416(src, nextS)
+			table[nextHash] = tableEntry{offset: s}
+			nextHash = hashSL(uint32(now))
+
+			if cv == load3216(src, candidate.offset) {
+				table[nextHash] = tableEntry{offset: nextS}
+				break
+			}
+
+			// Do one right away...
+			cv = uint32(now)
+			s = nextS
+			nextS++
+			candidate = table[nextHash]
+			now >>= 8
+			table[nextHash] = tableEntry{offset: s}
+
+			if cv == load3216(src, candidate.offset) {
+				table[nextHash] = tableEntry{offset: nextS}
+				break
+			}
+			cv = uint32(now)
+			s = nextS
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			t := candidate.offset
+			l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
+
+			// Extend backwards
+			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+				s--
+				t--
+				l++
+			}
+			if nextEmit < s {
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
+			}
+
+			// Save the match found
+			dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
+			s += l
+			nextEmit = s
+			if nextS >= s {
+				s = nextS + 1
+			}
+			if s >= sLimit {
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-2 and at s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load6416(src, s-2)
+			o := s - 2
+			prevHash := hashSL(uint32(x))
+			table[prevHash] = tableEntry{offset: o}
+			x >>= 16
+			currHash := hashSL(uint32(x))
+			candidate = table[currHash]
+			table[currHash] = tableEntry{offset: o + 2}
+
+			if uint32(x) != load3216(src, candidate.offset) {
+				cv = uint32(x >> 8)
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
--- a/gateway/vendor/github.com/klauspost/compress/flate/token.go
+++ b/gateway/vendor/github.com/klauspost/compress/flate/token.go
@ -0,0 +1,379 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+)
+
+const (
+	// bits 0-16  	xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits
+	// bits 16-22	offsetcode - 5 bits
+	// bits 22-30   xlength = length - MIN_MATCH_LENGTH - 8 bits
+	// bits 30-32   type   0 = literal  1=EOF  2=Match   3=Unused - 2 bits
+	lengthShift         = 22
+	offsetMask          = 1<<lengthShift - 1
+	typeMask            = 3 << 30
+	literalType         = 0 << 30
+	matchType           = 1 << 30
+	matchOffsetOnlyMask = 0xffff
+)
+
+// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
+// is lengthCodes[length - MIN_MATCH_LENGTH]
+var lengthCodes = [256]uint8{
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
+	9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
+	13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
+	15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+	17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
+	18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+	19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
+	20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+	21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+	21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+	22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+	22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+	23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 28,
+}
+
+// lengthCodes1 is length codes, but starting at 1.
+var lengthCodes1 = [256]uint8{
+	1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
+	10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
+	14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
+	16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
+	18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
+	19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
+	20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
+	21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+	22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+	22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
+	23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+	23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 29,
+}
+
+var offsetCodes = [256]uint32{
+	0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+	8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
+	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+	13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+	13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+}
+
+// offsetCodes14 are offsetCodes, but with 14 added.
+var offsetCodes14 = [256]uint32{
+	14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
+	22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+}
+
+type token uint32
+
+type tokens struct {
+	extraHist [32]uint16  // codes 256->maxnumlit
+	offHist   [32]uint16  // offset codes
+	litHist   [256]uint16 // codes 0->255
+	nFilled   int
+	n         uint16 // Must be able to contain maxStoreBlockSize
+	tokens    [maxStoreBlockSize + 1]token
+}
+
+func (t *tokens) Reset() {
+	if t.n == 0 {
+		return
+	}
+	t.n = 0
+	t.nFilled = 0
+	for i := range t.litHist[:] {
+		t.litHist[i] = 0
+	}
+	for i := range t.extraHist[:] {
+		t.extraHist[i] = 0
+	}
+	for i := range t.offHist[:] {
+		t.offHist[i] = 0
+	}
+}
+
+func (t *tokens) Fill() {
+	if t.n == 0 {
+		return
+	}
+	for i, v := range t.litHist[:] {
+		if v == 0 {
+			t.litHist[i] = 1
+			t.nFilled++
+		}
+	}
+	for i, v := range t.extraHist[:literalCount-256] {
+		if v == 0 {
+			t.nFilled++
+			t.extraHist[i] = 1
+		}
+	}
+	for i, v := range t.offHist[:offsetCodeCount] {
+		if v == 0 {
+			t.offHist[i] = 1
+		}
+	}
+}
+
+func indexTokens(in []token) tokens {
+	var t tokens
+	t.indexTokens(in)
+	return t
+}
+
+func (t *tokens) indexTokens(in []token) {
+	t.Reset()
+	for _, tok := range in {
+		if tok < matchType {
+			t.AddLiteral(tok.literal())
+			continue
+		}
+		t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
+	}
+}
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+func emitLiteral(dst *tokens, lit []byte) {
+	for _, v := range lit {
+		dst.tokens[dst.n] = token(v)
+		dst.litHist[v]++
+		dst.n++
+	}
+}
+
+func (t *tokens) AddLiteral(lit byte) {
+	t.tokens[t.n] = token(lit)
+	t.litHist[lit]++
+	t.n++
+}
+
+// from https://stackoverflow.com/a/28730362
+func mFastLog2(val float32) float32 {
+	ux := int32(math.Float32bits(val))
+	log2 := (float32)(((ux >> 23) & 255) - 128)
+	ux &= -0x7f800001
+	ux += 127 << 23
+	uval := math.Float32frombits(uint32(ux))
+	log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
+	return log2
+}
+
+// EstimatedBits will return an minimum size estimated by an *optimal*
+// compression of the block.
+// The size of the block
+func (t *tokens) EstimatedBits() int {
+	shannon := float32(0)
+	bits := int(0)
+	nMatches := 0
+	total := int(t.n) + t.nFilled
+	if total > 0 {
+		invTotal := 1.0 / float32(total)
+		for _, v := range t.litHist[:] {
+			if v > 0 {
+				n := float32(v)
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
+			}
+		}
+		// Just add 15 for EOB
+		shannon += 15
+		for i, v := range t.extraHist[1 : literalCount-256] {
+			if v > 0 {
+				n := float32(v)
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
+				bits += int(lengthExtraBits[i&31]) * int(v)
+				nMatches += int(v)
+			}
+		}
+	}
+	if nMatches > 0 {
+		invTotal := 1.0 / float32(nMatches)
+		for i, v := range t.offHist[:offsetCodeCount] {
+			if v > 0 {
+				n := float32(v)
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
+				bits += int(offsetExtraBits[i&31]) * int(v)
+			}
+		}
+	}
+	return int(shannon) + bits
+}
+
+// AddMatch adds a match to the tokens.
+// This function is very sensitive to inlining and right on the border.
+func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
+	if debugDeflate {
+		if xlength >= maxMatchLength+baseMatchLength {
+			panic(fmt.Errorf("invalid length: %v", xlength))
+		}
+		if xoffset >= maxMatchOffset+baseMatchOffset {
+			panic(fmt.Errorf("invalid offset: %v", xoffset))
+		}
+	}
+	oCode := offsetCode(xoffset)
+	xoffset |= oCode << 16
+
+	t.extraHist[lengthCodes1[uint8(xlength)]]++
+	t.offHist[oCode&31]++
+	t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
+	t.n++
+}
+
+// AddMatchLong adds a match to the tokens, potentially longer than max match length.
+// Length should NOT have the base subtracted, only offset should.
+func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
+	if debugDeflate {
+		if xoffset >= maxMatchOffset+baseMatchOffset {
+			panic(fmt.Errorf("invalid offset: %v", xoffset))
+		}
+	}
+	oc := offsetCode(xoffset)
+	xoffset |= oc << 16
+	for xlength > 0 {
+		xl := xlength
+		if xl > 258 {
+			// We need to have at least baseMatchLength left over for next loop.
+			if xl > 258+baseMatchLength {
+				xl = 258
+			} else {
+				xl = 258 - baseMatchLength
+			}
+		}
+		xlength -= xl
+		xl -= baseMatchLength
+		t.extraHist[lengthCodes1[uint8(xl)]]++
+		t.offHist[oc&31]++
+		t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
+		t.n++
+	}
+}
+
+func (t *tokens) AddEOB() {
+	t.tokens[t.n] = token(endBlockMarker)
+	t.extraHist[0]++
+	t.n++
+}
+
+func (t *tokens) Slice() []token {
+	return t.tokens[:t.n]
+}
+
+// VarInt returns the tokens as varint encoded bytes.
+func (t *tokens) VarInt() []byte {
+	var b = make([]byte, binary.MaxVarintLen32*int(t.n))
+	var off int
+	for _, v := range t.tokens[:t.n] {
+		off += binary.PutUvarint(b[off:], uint64(v))
+	}
+	return b[:off]
+}
+
+// FromVarInt restores t to the varint encoded tokens provided.
+// Any data in t is removed.
+func (t *tokens) FromVarInt(b []byte) error {
+	var buf = bytes.NewReader(b)
+	var toks []token
+	for {
+		r, err := binary.ReadUvarint(buf)
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return err
+		}
+		toks = append(toks, token(r))
+	}
+	t.indexTokens(toks)
+	return nil
+}
+
+// Returns the type of a token
+func (t token) typ() uint32 { return uint32(t) & typeMask }
+
+// Returns the literal of a literal token
+func (t token) literal() uint8 { return uint8(t) }
+
+// Returns the extra offset of a match token
+func (t token) offset() uint32 { return uint32(t) & offsetMask }
+
+func (t token) length() uint8 { return uint8(t >> lengthShift) }
+
+// Convert length to code.
+func lengthCode(len uint8) uint8 { return lengthCodes[len] }
+
+// Returns the offset code corresponding to a specific offset
+func offsetCode(off uint32) uint32 {
+	if false {
+		if off < uint32(len(offsetCodes)) {
+			return offsetCodes[off&255]
+		} else if off>>7 < uint32(len(offsetCodes)) {
+			return offsetCodes[(off>>7)&255] + 14
+		} else {
+			return offsetCodes[(off>>14)&255] + 28
+		}
+	}
+	if off < uint32(len(offsetCodes)) {
+		return offsetCodes[uint8(off)]
+	}
+	return offsetCodes14[uint8(off>>7)]
+}