Rename Makefile targets

Signed-off-by: Alex Ellis (OpenFaaS Ltd) <alex@openfaas.com>
This commit is contained in:
Alex Ellis (OpenFaaS Ltd)
2023-10-23 11:29:19 +01:00
parent 479285caf6
commit 9ba4a73d5d
350 changed files with 22981 additions and 3972 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,184 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
// LZ77 decompresses data through sequences of two forms of commands:
//
// - Literal insertions: Runs of one or more symbols are inserted into the data
// stream as is. This is accomplished through the writeByte method for a
// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
// Any valid stream must start with a literal insertion if no preset dictionary
// is used.
//
// - Backward copies: Runs of one or more symbols are copied from previously
// emitted data. Backward copies come as the tuple (dist, length) where dist
// determines how far back in the stream to copy from and length determines how
// many bytes to copy. Note that it is valid for the length to be greater than
// the distance. Since LZ77 uses forward copies, that situation is used to
// perform a form of run-length encoding on repeated runs of symbols.
// The writeCopy and tryWriteCopy are used to implement this command.
//
// For performance reasons, this implementation performs little to no sanity
// checks about the arguments. As such, the invariants documented for each
// method call must be respected.
type dictDecoder struct {
hist []byte // Sliding window history
// Invariant: 0 <= rdPos <= wrPos <= len(hist)
wrPos int // Current output position in buffer
rdPos int // Have emitted hist[:rdPos] already
full bool // Has a full window length been written yet?
}
// init initializes dictDecoder to have a sliding window dictionary of the given
// size. If a preset dict is provided, it will initialize the dictionary with
// the contents of dict.
func (dd *dictDecoder) init(size int, dict []byte) {
*dd = dictDecoder{hist: dd.hist}
if cap(dd.hist) < size {
dd.hist = make([]byte, size)
}
dd.hist = dd.hist[:size]
if len(dict) > len(dd.hist) {
dict = dict[len(dict)-len(dd.hist):]
}
dd.wrPos = copy(dd.hist, dict)
if dd.wrPos == len(dd.hist) {
dd.wrPos = 0
dd.full = true
}
dd.rdPos = dd.wrPos
}
// histSize reports the total amount of historical data in the dictionary.
func (dd *dictDecoder) histSize() int {
if dd.full {
return len(dd.hist)
}
return dd.wrPos
}
// availRead reports the number of bytes that can be flushed by readFlush.
func (dd *dictDecoder) availRead() int {
return dd.wrPos - dd.rdPos
}
// availWrite reports the available amount of output buffer space.
func (dd *dictDecoder) availWrite() int {
return len(dd.hist) - dd.wrPos
}
// writeSlice returns a slice of the available buffer to write data to.
//
// This invariant will be kept: len(s) <= availWrite()
func (dd *dictDecoder) writeSlice() []byte {
return dd.hist[dd.wrPos:]
}
// writeMark advances the writer pointer by cnt.
//
// This invariant must be kept: 0 <= cnt <= availWrite()
func (dd *dictDecoder) writeMark(cnt int) {
dd.wrPos += cnt
}
// writeByte writes a single byte to the dictionary.
//
// This invariant must be kept: 0 < availWrite()
func (dd *dictDecoder) writeByte(c byte) {
dd.hist[dd.wrPos] = c
dd.wrPos++
}
// writeCopy copies a string at a given (dist, length) to the output.
// This returns the number of bytes copied and may be less than the requested
// length if the available space in the output buffer is too small.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) writeCopy(dist, length int) int {
dstBase := dd.wrPos
dstPos := dstBase
srcPos := dstPos - dist
endPos := dstPos + length
if endPos > len(dd.hist) {
endPos = len(dd.hist)
}
// Copy non-overlapping section after destination position.
//
// This section is non-overlapping in that the copy length for this section
// is always less than or equal to the backwards distance. This can occur
// if a distance refers to data that wraps-around in the buffer.
// Thus, a backwards copy is performed here; that is, the exact bytes in
// the source prior to the copy is placed in the destination.
if srcPos < 0 {
srcPos += len(dd.hist)
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
srcPos = 0
}
// Copy possibly overlapping section before destination position.
//
// This section can overlap if the copy length for this section is larger
// than the backwards distance. This is allowed by LZ77 so that repeated
// strings can be succinctly represented using (dist, length) pairs.
// Thus, a forwards copy is performed here; that is, the bytes copied is
// possibly dependent on the resulting bytes in the destination as the copy
// progresses along. This is functionally equivalent to the following:
//
// for i := 0; i < endPos-dstPos; i++ {
// dd.hist[dstPos+i] = dd.hist[srcPos+i]
// }
// dstPos = endPos
//
for dstPos < endPos {
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// tryWriteCopy tries to copy a string at a given (distance, length) to the
// output. This specialized version is optimized for short distances.
//
// This method is designed to be inlined for performance reasons.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
dstPos := dd.wrPos
endPos := dstPos + length
if dstPos < dist || endPos > len(dd.hist) {
return 0
}
dstBase := dstPos
srcPos := dstPos - dist
// Copy possibly overlapping section before destination position.
loop:
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
if dstPos < endPos {
goto loop // Avoid for-loop so that this function can be inlined
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// readFlush returns a slice of the historical buffer that is ready to be
// emitted to the user. The data returned by readFlush must be fully consumed
// before calling any other dictDecoder methods.
func (dd *dictDecoder) readFlush() []byte {
toRead := dd.hist[dd.rdPos:dd.wrPos]
dd.rdPos = dd.wrPos
if dd.wrPos == len(dd.hist) {
dd.wrPos, dd.rdPos = 0, 0
dd.full = true
}
return toRead
}

View File

@ -0,0 +1,193 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Modified for deflate by Klaus Post (c) 2015.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"encoding/binary"
"fmt"
)
type fastEnc interface {
Encode(dst *tokens, src []byte)
Reset()
}
func newFastEnc(level int) fastEnc {
switch level {
case 1:
return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}}
case 2:
return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}}
case 3:
return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}}
case 4:
return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}}
case 5:
return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}}
case 6:
return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}}
default:
panic("invalid level specified")
}
}
const (
tableBits = 15 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
baseMatchOffset = 1 // The smallest match offset
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
maxMatchOffset = 1 << 15 // The largest match offset
bTableBits = 17 // Bits used in the big tables
bTableSize = 1 << bTableBits // Size of the table
allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
)
const (
prime3bytes = 506832829
prime4bytes = 2654435761
prime5bytes = 889523592379
prime6bytes = 227718039650203
prime7bytes = 58295818150454627
prime8bytes = 0xcf1bbcdcb7a56463
)
func load3232(b []byte, i int32) uint32 {
return binary.LittleEndian.Uint32(b[i:])
}
func load6432(b []byte, i int32) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}
type tableEntry struct {
offset int32
}
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastGen struct {
hist []byte
cur int32
}
func (e *fastGen) addBlock(src []byte) int32 {
// check if we have space already
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
e.hist = make([]byte, 0, allocHistory)
} else {
if cap(e.hist) < maxMatchOffset*2 {
panic("unexpected buffer size")
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
// copy(e.hist[0:maxMatchOffset], e.hist[offset:])
*(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
}
s := int32(len(e.hist))
e.hist = append(e.hist, src...)
return s
}
type tableEntryPrev struct {
Cur tableEntry
Prev tableEntry
}
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
}
// hashLen returns a hash of the lowest mls bytes of with length output bits.
// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
// length should always be < 32.
// Preferably length and mls should be a constant for inlining.
func hashLen(u uint64, length, mls uint8) uint32 {
switch mls {
case 3:
return (uint32(u<<8) * prime3bytes) >> (32 - length)
case 5:
return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
case 6:
return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
case 7:
return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
case 8:
return uint32((u * prime8bytes) >> (64 - length))
default:
return (uint32(u) * prime4bytes) >> (32 - length)
}
}
// matchlen will return the match length between offsets and t in src.
// The maximum length returned is maxMatchLength - 4.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
if debugDecode {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > maxMatchOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:]))
}
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > maxMatchOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}
// Reset the encoding table.
func (e *fastGen) Reset() {
if cap(e.hist) < allocHistory {
e.hist = make([]byte, 0, allocHistory)
}
// We offset current position so everything will be out of reach.
// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
if e.cur <= bufferReset {
e.cur += maxMatchOffset + int32(len(e.hist))
}
e.hist = e.hist[:0]
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,417 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"math"
"math/bits"
)
const (
maxBitsLimit = 16
// number of valid literals
literalCount = 286
)
// hcode is a huffman code with a bit code and bit length.
type hcode uint32
func (h hcode) len() uint8 {
return uint8(h)
}
func (h hcode) code64() uint64 {
return uint64(h >> 8)
}
func (h hcode) zero() bool {
return h == 0
}
type huffmanEncoder struct {
codes []hcode
bitCount [17]int32
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
// The largest of these is literalCount, so we allocate for that case.
freqcache [literalCount + 1]literalNode
}
type literalNode struct {
literal uint16
freq uint16
}
// A levelInfo describes the state of the constructed tree for a given depth.
type levelInfo struct {
// Our level. for better printing
level int32
// The frequency of the last node at this level
lastFreq int32
// The frequency of the next character to add to this level
nextCharFreq int32
// The frequency of the next pair (from level below) to add to this level.
// Only valid if the "needed" value of the next lower level is 0.
nextPairFreq int32
// The number of chains remaining to generate for this level before moving
// up to the next level
needed int32
}
// set sets the code and length of an hcode.
func (h *hcode) set(code uint16, length uint8) {
*h = hcode(length) | (hcode(code) << 8)
}
func newhcode(code uint16, length uint8) hcode {
return hcode(length) | (hcode(code) << 8)
}
func reverseBits(number uint16, bitLength byte) uint16 {
return bits.Reverse16(number << ((16 - bitLength) & 15))
}
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
func newHuffmanEncoder(size int) *huffmanEncoder {
// Make capacity to next power of two.
c := uint(bits.Len32(uint32(size - 1)))
return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
}
// Generates a HuffmanCode corresponding to the fixed literal table
func generateFixedLiteralEncoding() *huffmanEncoder {
h := newHuffmanEncoder(literalCount)
codes := h.codes
var ch uint16
for ch = 0; ch < literalCount; ch++ {
var bits uint16
var size uint8
switch {
case ch < 144:
// size 8, 000110000 .. 10111111
bits = ch + 48
size = 8
case ch < 256:
// size 9, 110010000 .. 111111111
bits = ch + 400 - 144
size = 9
case ch < 280:
// size 7, 0000000 .. 0010111
bits = ch - 256
size = 7
default:
// size 8, 11000000 .. 11000111
bits = ch + 192 - 280
size = 8
}
codes[ch] = newhcode(reverseBits(bits, size), size)
}
return h
}
func generateFixedOffsetEncoding() *huffmanEncoder {
h := newHuffmanEncoder(30)
codes := h.codes
for ch := range codes {
codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5)
}
return h
}
var fixedLiteralEncoding = generateFixedLiteralEncoding()
var fixedOffsetEncoding = generateFixedOffsetEncoding()
func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
total += int(f) * int(h.codes[i].len())
}
}
return total
}
func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
var total int
for _, f := range b {
total += int(h.codes[f].len())
}
return total
}
// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
code := h.codes[i]
if code.zero() {
return math.MaxInt32
}
total += int(f) * int(code.len())
}
}
return total
}
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
// The cases of 0, 1, and 2 literals are handled by special case code.
//
// list An array of the literals with non-zero frequencies
//
// and their associated frequencies. The array is in order of increasing
// frequency, and has as its last element a special element with frequency
// MaxInt32
//
// maxBits The maximum number of bits that should be used to encode any literal.
//
// Must be less than 16.
//
// return An integer array in which array[i] indicates the number of literals
//
// that should be encoded in i bits.
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
if maxBits >= maxBitsLimit {
panic("flate: maxBits too large")
}
n := int32(len(list))
list = list[0 : n+1]
list[n] = maxNode()
// The tree can't have greater depth than n - 1, no matter what. This
// saves a little bit of work in some small cases
if maxBits > n-1 {
maxBits = n - 1
}
// Create information about each of the levels.
// A bogus "Level 0" whose sole purpose is so that
// level1.prev.needed==0. This makes level1.nextPairFreq
// be a legitimate value that never gets chosen.
var levels [maxBitsLimit]levelInfo
// leafCounts[i] counts the number of literals at the left
// of ancestors of the rightmost node at level i.
// leafCounts[i][j] is the number of literals at the left
// of the level j ancestor.
var leafCounts [maxBitsLimit][maxBitsLimit]int32
// Descending to only have 1 bounds check.
l2f := int32(list[2].freq)
l1f := int32(list[1].freq)
l0f := int32(list[0].freq) + int32(list[1].freq)
for level := int32(1); level <= maxBits; level++ {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
levels[level] = levelInfo{
level: level,
lastFreq: l1f,
nextCharFreq: l2f,
nextPairFreq: l0f,
}
leafCounts[level][level] = 2
if level == 1 {
levels[level].nextPairFreq = math.MaxInt32
}
}
// We need a total of 2*n - 2 items at top level and have already generated 2.
levels[maxBits].needed = 2*n - 4
level := uint32(maxBits)
for level < 16 {
l := &levels[level]
if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
// We've run out of both leafs and pairs.
// End all calculations for this level.
// To make sure we never come back to this level or any lower level,
// set nextPairFreq impossibly large.
l.needed = 0
levels[level+1].nextPairFreq = math.MaxInt32
level++
continue
}
prevFreq := l.lastFreq
if l.nextCharFreq < l.nextPairFreq {
// The next item on this row is a leaf node.
n := leafCounts[level][level] + 1
l.lastFreq = l.nextCharFreq
// Lower leafCounts are the same of the previous node.
leafCounts[level][level] = n
e := list[n]
if e.literal < math.MaxUint16 {
l.nextCharFreq = int32(e.freq)
} else {
l.nextCharFreq = math.MaxInt32
}
} else {
// The next item on this row is a pair from the previous row.
// nextPairFreq isn't valid until we generate two
// more values in the level below
l.lastFreq = l.nextPairFreq
// Take leaf counts from the lower level, except counts[level] remains the same.
if true {
save := leafCounts[level][level]
leafCounts[level] = leafCounts[level-1]
leafCounts[level][level] = save
} else {
copy(leafCounts[level][:level], leafCounts[level-1][:level])
}
levels[l.level-1].needed = 2
}
if l.needed--; l.needed == 0 {
// We've done everything we need to do for this level.
// Continue calculating one level up. Fill in nextPairFreq
// of that level with the sum of the two nodes we've just calculated on
// this level.
if l.level == maxBits {
// All done!
break
}
levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
level++
} else {
// If we stole from below, move down temporarily to replenish it.
for levels[level-1].needed > 0 {
level--
}
}
}
// Somethings is wrong if at the end, the top level is null or hasn't used
// all of the leaves.
if leafCounts[maxBits][maxBits] != n {
panic("leafCounts[maxBits][maxBits] != n")
}
bitCount := h.bitCount[:maxBits+1]
bits := 1
counts := &leafCounts[maxBits]
for level := maxBits; level > 0; level-- {
// chain.leafCount gives the number of literals requiring at least "bits"
// bits to encode.
bitCount[bits] = counts[level] - counts[level-1]
bits++
}
return bitCount
}
// Look at the leaves and assign them a bit count and an encoding as specified
// in RFC 1951 3.2.2
func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
code := uint16(0)
for n, bits := range bitCount {
code <<= 1
if n == 0 || bits == 0 {
continue
}
// The literals list[len(list)-bits] .. list[len(list)-bits]
// are encoded using "bits" bits, and get the values
// code, code + 1, .... The code values are
// assigned in literal order (not frequency order).
chunk := list[len(list)-int(bits):]
sortByLiteral(chunk)
for _, node := range chunk {
h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n))
code++
}
list = list[0 : len(list)-int(bits)]
}
}
// Update this Huffman Code object to be the minimum code for the specified frequency count.
//
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
list := h.freqcache[:len(freq)+1]
codes := h.codes[:len(freq)]
// Number of non-zero literals
count := 0
// Set list to be the set of all non-zero literals and their frequencies
for i, f := range freq {
if f != 0 {
list[count] = literalNode{uint16(i), f}
count++
} else {
codes[i] = 0
}
}
list[count] = literalNode{}
list = list[:count]
if count <= 2 {
// Handle the small cases here, because they are awkward for the general case code. With
// two or fewer literals, everything has bit length 1.
for i, node := range list {
// "list" is in order of increasing literal value.
h.codes[node.literal].set(uint16(i), 1)
}
return
}
sortByFreq(list)
// Get the number of literals for each bit count
bitCount := h.bitCounts(list, maxBits)
// And do the assignment
h.assignEncodingAndSize(bitCount, list)
}
// atLeastOne clamps the result between 1 and 15.
func atLeastOne(v float32) float32 {
if v < 1 {
return 1
}
if v > 15 {
return 15
}
return v
}
func histogram(b []byte, h []uint16) {
if true && len(b) >= 8<<10 {
// Split for bigger inputs
histogramSplit(b, h)
} else {
h = h[:256]
for _, t := range b {
h[t]++
}
}
}
func histogramSplit(b []byte, h []uint16) {
// Tested, and slightly faster than 2-way.
// Writing to separate arrays and combining is also slightly slower.
h = h[:256]
for len(b)&3 != 0 {
h[b[0]]++
b = b[1:]
}
n := len(b) / 4
x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:]
y, z, w = y[:len(x)], z[:len(x)], w[:len(x)]
for i, t := range x {
v0 := &h[t]
v1 := &h[y[i]]
v3 := &h[w[i]]
v2 := &h[z[i]]
*v0++
*v1++
*v2++
*v3++
}
}

View File

@ -0,0 +1,159 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// Sort sorts data.
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func sortByFreq(data []literalNode) {
n := len(data)
quickSortByFreq(data, 0, n, maxDepth(n))
}
func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivotByFreq(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSortByFreq(data, a, mlo, maxDepth)
a = mhi // i.e., quickSortByFreq(data, mhi, b)
} else {
quickSortByFreq(data, mhi, b, maxDepth)
b = mlo // i.e., quickSortByFreq(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq {
data[i], data[i-6] = data[i-6], data[i]
}
}
insertionSortByFreq(data, a, b)
}
}
func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s)
medianOfThreeSortByFreq(data, m, m-s, m+s)
medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThreeSortByFreq(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ {
}
b := a
for {
for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot
}
for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data[b], data[c-1] = data[c-1], data[b]
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let's be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot
data[c], data[hi-1] = data[hi-1], data[c]
c++
dups++
}
if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot
data[m], data[b-1] = data[b-1], data[m]
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot
}
for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data[a], data[b-1] = data[b-1], data[a]
a++
b--
}
}
// Swap pivot into middle
data[pivot], data[b-1] = data[b-1], data[pivot]
return b - 1, c
}
// Insertion sort
func insertionSortByFreq(data []literalNode, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// quickSortByFreq, loosely following Bentley and McIlroy,
// ``Engineering a Sort Function,'' SP&E November 1993.
// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) {
// sort 3 elements
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
data[m1], data[m0] = data[m0], data[m1]
}
// data[m0] <= data[m1]
if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq {
data[m2], data[m1] = data[m1], data[m2]
// data[m0] <= data[m2] && data[m1] < data[m2]
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
data[m1], data[m0] = data[m0], data[m1]
}
}
// now data[m0] <= data[m1] <= data[m2]
}

View File

@ -0,0 +1,201 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// Sort sorts data.
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func sortByLiteral(data []literalNode) {
n := len(data)
quickSort(data, 0, n, maxDepth(n))
}
func quickSort(data []literalNode, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivot(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSort(data, a, mlo, maxDepth)
a = mhi // i.e., quickSort(data, mhi, b)
} else {
quickSort(data, mhi, b, maxDepth)
b = mlo // i.e., quickSort(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data[i].literal < data[i-6].literal {
data[i], data[i-6] = data[i-6], data[i]
}
}
insertionSort(data, a, b)
}
}
func heapSort(data []literalNode, a, b int) {
first := a
lo := 0
hi := b - a
// Build heap with greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDown(data, i, hi, first)
}
// Pop elements, largest first, into end of data.
for i := hi - 1; i >= 0; i-- {
data[first], data[first+i] = data[first+i], data[first]
siftDown(data, lo, i, first)
}
}
// siftDown implements the heap property on data[lo, hi).
// first is an offset into the array where the root of the heap lies.
func siftDown(data []literalNode, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && data[first+child].literal < data[first+child+1].literal {
child++
}
if data[first+root].literal > data[first+child].literal {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThree(data, lo, lo+s, lo+2*s)
medianOfThree(data, m, m-s, m+s)
medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThree(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && data[a].literal < data[pivot].literal; a++ {
}
b := a
for {
for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot
}
for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data[b], data[c-1] = data[c-1], data[b]
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let's be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot
data[c], data[hi-1] = data[hi-1], data[c]
c++
dups++
}
if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if data[m].literal > data[pivot].literal { // data[m] = pivot
data[m], data[b-1] = data[b-1], data[m]
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot
}
for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data[a], data[b-1] = data[b-1], data[a]
a++
b--
}
}
// Swap pivot into middle
data[pivot], data[b-1] = data[b-1], data[pivot]
return b - 1, c
}
// Insertion sort
func insertionSort(data []literalNode, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && data[j].literal < data[j-1].literal; j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// maxDepth returns a threshold at which quicksort should switch
// to heapsort. It returns 2*ceil(lg(n+1)).
func maxDepth(n int) int {
var depth int
for i := n; i > 0; i >>= 1 {
depth++
}
return depth * 2
}
// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThree(data []literalNode, m1, m0, m2 int) {
// sort 3 elements
if data[m1].literal < data[m0].literal {
data[m1], data[m0] = data[m0], data[m1]
}
// data[m0] <= data[m1]
if data[m2].literal < data[m1].literal {
data[m2], data[m1] = data[m1], data[m2]
// data[m0] <= data[m2] && data[m1] < data[m2]
if data[m1].literal < data[m0].literal {
data[m1], data[m0] = data[m0], data[m1]
}
}
// now data[m0] <= data[m1] <= data[m2]
}

View File

@ -0,0 +1,829 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package flate implements the DEFLATE compressed data format, described in
// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
// formats.
package flate
import (
"bufio"
"compress/flate"
"fmt"
"io"
"math/bits"
"sync"
)
const (
maxCodeLen = 16 // max length of Huffman code
maxCodeLenMask = 15 // mask for max length of Huffman code
// The next three numbers come from the RFC section 3.2.7, with the
// additional proviso in section 3.2.5 which implies that distance codes
// 30 and 31 should never occur in compressed data.
maxNumLit = 286
maxNumDist = 30
numCodes = 19 // number of codes in Huffman meta-code
debugDecode = false
)
// Value of length - 3 and extra bits.
type lengthExtra struct {
length, extra uint8
}
var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
var bitMask32 = [32]uint32{
0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
} // up to 32 bits
// Initialize the fixedHuffmanDecoder only once upon first use.
var fixedOnce sync.Once
var fixedHuffmanDecoder huffmanDecoder
// A CorruptInputError reports the presence of corrupt input at a given offset.
type CorruptInputError = flate.CorruptInputError
// An InternalError reports an error in the flate code itself.
type InternalError string
func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
// A ReadError reports an error encountered while reading input.
//
// Deprecated: No longer returned.
type ReadError = flate.ReadError
// A WriteError reports an error encountered while writing output.
//
// Deprecated: No longer returned.
type WriteError = flate.WriteError
// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
// to switch to a new underlying Reader. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
Reset(r io.Reader, dict []byte) error
}
// The data structure for decoding Huffman tables is based on that of
// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
// For codes smaller than the table width, there are multiple entries
// (each combination of trailing bits has the same value). For codes
// larger than the table width, the table contains a link to an overflow
// table. The width of each entry in the link table is the maximum code
// size minus the chunk width.
//
// Note that you can do a lookup in the table even without all bits
// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
// have the property that shorter codes come before longer ones, the
// bit length estimate in the result is a lower bound on the actual
// number of bits.
//
// See the following:
// http://www.gzip.org/algorithm.txt
// chunk & 15 is number of bits
// chunk >> 4 is value, including table link
const (
huffmanChunkBits = 9
huffmanNumChunks = 1 << huffmanChunkBits
huffmanCountMask = 15
huffmanValueShift = 4
)
type huffmanDecoder struct {
maxRead int // the maximum number of bits we can read and not overread
chunks *[huffmanNumChunks]uint16 // chunks as described above
links [][]uint16 // overflow links
linkMask uint32 // mask the width of the link table
}
// Initialize Huffman decoding tables from array of code lengths.
// Following this function, h is guaranteed to be initialized into a complete
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
// degenerate case where the tree has only a single symbol with length 1. Empty
// trees are permitted.
func (h *huffmanDecoder) init(lengths []int) bool {
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
// development to supplement the currently ad-hoc unit tests.
const sanity = false
if h.chunks == nil {
h.chunks = new([huffmanNumChunks]uint16)
}
if h.maxRead != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}
// Count number of codes of each length,
// compute maxRead and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range lengths {
if n == 0 {
continue
}
if min == 0 || n < min {
min = n
}
if n > max {
max = n
}
count[n&maxCodeLenMask]++
}
// Empty tree. The decompressor.huffSym function will fail later if the tree
// is used. Technically, an empty tree is only valid for the HDIST tree and
// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
// is guaranteed to fail since it will attempt to use the tree to decode the
// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
// guaranteed to fail later since the compressed data section must be
// composed of at least one symbol (the end-of-block marker).
if max == 0 {
return true
}
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
code <<= 1
nextcode[i&maxCodeLenMask] = code
code += count[i&maxCodeLenMask]
}
// Check that the coding is complete (i.e., that we've
// assigned all 2-to-the-max possible bit sequences).
// Exception: To be compatible with zlib, we also need to
// accept degenerate single-code codings. See also
// TestDegenerateHuffmanCoding.
if code != 1<<uint(max) && !(code == 1 && max == 1) {
if debugDecode {
fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
}
return false
}
h.maxRead = min
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
}
if max > huffmanChunkBits {
numLinks := 1 << (uint(max) - huffmanChunkBits)
h.linkMask = uint32(numLinks - 1)
// create link tables
link := nextcode[huffmanChunkBits+1] >> 1
if cap(h.links) < huffmanNumChunks-link {
h.links = make([][]uint16, huffmanNumChunks-link)
} else {
h.links = h.links[:huffmanNumChunks-link]
}
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(bits.Reverse16(uint16(j)))
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
if sanity && h.chunks[reverse] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
if cap(h.links[off]) < numLinks {
h.links[off] = make([]uint16, numLinks)
} else {
h.links[off] = h.links[off][:numLinks]
}
}
} else {
h.links = h.links[:0]
}
for i, n := range lengths {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
chunk := uint16(i<<huffmanValueShift | n)
reverse := int(bits.Reverse16(uint16(code)))
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
// We should never need to overwrite
// an existing chunk. Also, 0 is
// never a valid chunk, because the
// lower 4 "count" bits should be
// between 1 and 15.
if sanity && h.chunks[off] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[off] = chunk
}
} else {
j := reverse & (huffmanNumChunks - 1)
if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
// Longer codes should have been
// associated with a link table above.
panic("impossible: not an indirect chunk")
}
value := h.chunks[j] >> huffmanValueShift
linktab := h.links[value]
reverse >>= huffmanChunkBits
for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
if sanity && linktab[off] != 0 {
panic("impossible: overwriting existing chunk")
}
linktab[off] = chunk
}
}
}
if sanity {
// Above we've sanity checked that we never overwrote
// an existing entry. Here we additionally check that
// we filled the tables completely.
for i, chunk := range h.chunks {
if chunk == 0 {
// As an exception, in the degenerate
// single-code case, we allow odd
// chunks to be missing.
if code == 1 && i%2 == 1 {
continue
}
panic("impossible: missing chunk")
}
}
for _, linktab := range h.links {
for _, chunk := range linktab {
if chunk == 0 {
panic("impossible: missing chunk")
}
}
}
}
return true
}
// Reader is the actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
type Reader interface {
io.Reader
io.ByteReader
}
type step uint8
const (
copyData step = iota + 1
nextBlock
huffmanBytesBuffer
huffmanBytesReader
huffmanBufioReader
huffmanStringsReader
huffmanGenericReader
)
// Decompress state.
type decompressor struct {
// Input source.
r Reader
roffset int64
// Huffman decoders for literal/length, distance.
h1, h2 huffmanDecoder
// Length arrays used to define Huffman codes.
bits *[maxNumLit + maxNumDist]int
codebits *[numCodes]int
// Output history, buffer.
dict dictDecoder
// Next step in the decompression,
// and decompression state.
step step
stepState int
err error
toRead []byte
hl, hd *huffmanDecoder
copyLen int
copyDist int
// Temporary buffer (avoids repeated allocation).
buf [4]byte
// Input bits, in top of b.
b uint32
nb uint
final bool
}
func (f *decompressor) nextBlock() {
for f.nb < 1+2 {
if f.err = f.moreBits(); f.err != nil {
return
}
}
f.final = f.b&1 == 1
f.b >>= 1
typ := f.b & 3
f.b >>= 2
f.nb -= 1 + 2
switch typ {
case 0:
f.dataBlock()
if debugDecode {
fmt.Println("stored block")
}
case 1:
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("predefinied huffman block")
}
case 2:
// compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil {
break
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("dynamic huffman block")
}
default:
// 3 is reserved.
if debugDecode {
fmt.Println("reserved data block encountered")
}
f.err = CorruptInputError(f.roffset)
}
}
func (f *decompressor) Read(b []byte) (int, error) {
for {
if len(f.toRead) > 0 {
n := copy(b, f.toRead)
f.toRead = f.toRead[n:]
if len(f.toRead) == 0 {
return n, f.err
}
return n, nil
}
if f.err != nil {
return 0, f.err
}
f.doStep()
if f.err != nil && len(f.toRead) == 0 {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
}
}
}
// WriteTo implements the io.WriteTo interface for io.Copy and friends.
func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
total := int64(0)
flushed := false
for {
if len(f.toRead) > 0 {
n, err := w.Write(f.toRead)
total += int64(n)
if err != nil {
f.err = err
return total, err
}
if n != len(f.toRead) {
return total, io.ErrShortWrite
}
f.toRead = f.toRead[:0]
}
if f.err != nil && flushed {
if f.err == io.EOF {
return total, nil
}
return total, f.err
}
if f.err == nil {
f.doStep()
}
if len(f.toRead) == 0 && f.err != nil && !flushed {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
flushed = true
}
}
}
func (f *decompressor) Close() error {
if f.err == io.EOF {
return nil
}
return f.err
}
// RFC 1951 section 3.2.7.
// Compression with dynamic Huffman codes
var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
func (f *decompressor) readHuffman() error {
// HLIT[5], HDIST[5], HCLEN[4].
for f.nb < 5+5+4 {
if err := f.moreBits(); err != nil {
return err
}
}
nlit := int(f.b&0x1F) + 257
if nlit > maxNumLit {
if debugDecode {
fmt.Println("nlit > maxNumLit", nlit)
}
return CorruptInputError(f.roffset)
}
f.b >>= 5
ndist := int(f.b&0x1F) + 1
if ndist > maxNumDist {
if debugDecode {
fmt.Println("ndist > maxNumDist", ndist)
}
return CorruptInputError(f.roffset)
}
f.b >>= 5
nclen := int(f.b&0xF) + 4
// numCodes is 19, so nclen is always valid.
f.b >>= 4
f.nb -= 5 + 5 + 4
// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
for i := 0; i < nclen; i++ {
for f.nb < 3 {
if err := f.moreBits(); err != nil {
return err
}
}
f.codebits[codeOrder[i]] = int(f.b & 0x7)
f.b >>= 3
f.nb -= 3
}
for i := nclen; i < len(codeOrder); i++ {
f.codebits[codeOrder[i]] = 0
}
if !f.h1.init(f.codebits[0:]) {
if debugDecode {
fmt.Println("init codebits failed")
}
return CorruptInputError(f.roffset)
}
// HLIT + 257 code lengths, HDIST + 1 code lengths,
// using the code length Huffman code.
for i, n := 0, nlit+ndist; i < n; {
x, err := f.huffSym(&f.h1)
if err != nil {
return err
}
if x < 16 {
// Actual length.
f.bits[i] = x
i++
continue
}
// Repeat previous length or zero.
var rep int
var nb uint
var b int
switch x {
default:
return InternalError("unexpected length code")
case 16:
rep = 3
nb = 2
if i == 0 {
if debugDecode {
fmt.Println("i==0")
}
return CorruptInputError(f.roffset)
}
b = f.bits[i-1]
case 17:
rep = 3
nb = 3
b = 0
case 18:
rep = 11
nb = 7
b = 0
}
for f.nb < nb {
if err := f.moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits:", err)
}
return err
}
}
rep += int(f.b & uint32(1<<(nb&regSizeMaskUint32)-1))
f.b >>= nb & regSizeMaskUint32
f.nb -= nb
if i+rep > n {
if debugDecode {
fmt.Println("i+rep > n", i, rep, n)
}
return CorruptInputError(f.roffset)
}
for j := 0; j < rep; j++ {
f.bits[i] = b
i++
}
}
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
if debugDecode {
fmt.Println("init2 failed")
}
return CorruptInputError(f.roffset)
}
// As an optimization, we can initialize the maxRead bits to read at a time
// for the HLIT tree to the length of the EOB marker since we know that
// every block must terminate with one. This preserves the property that
// we never read any extra bytes after the end of the DEFLATE stream.
if f.h1.maxRead < f.bits[endBlockMarker] {
f.h1.maxRead = f.bits[endBlockMarker]
}
if !f.final {
// If not the final block, the smallest block possible is
// a predefined table, BTYPE=01, with a single EOB marker.
// This will take up 3 + 7 bits.
f.h1.maxRead += 10
}
return nil
}
// Copy a single uncompressed data block from input to output.
func (f *decompressor) dataBlock() {
// Uncompressed.
// Discard current half-byte.
left := (f.nb) & 7
f.nb -= left
f.b >>= left
offBytes := f.nb >> 3
// Unfilled values will be overwritten.
f.buf[0] = uint8(f.b)
f.buf[1] = uint8(f.b >> 8)
f.buf[2] = uint8(f.b >> 16)
f.buf[3] = uint8(f.b >> 24)
f.roffset += int64(offBytes)
f.nb, f.b = 0, 0
// Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
f.roffset += int64(nr)
if err != nil {
f.err = noEOF(err)
return
}
n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
if nn != ^n {
if debugDecode {
ncomp := ^n
fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
}
f.err = CorruptInputError(f.roffset)
return
}
if n == 0 {
f.toRead = f.dict.readFlush()
f.finishBlock()
return
}
f.copyLen = int(n)
f.copyData()
}
// copyData copies f.copyLen bytes from the underlying reader into f.hist.
// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
buf := f.dict.writeSlice()
if len(buf) > f.copyLen {
buf = buf[:f.copyLen]
}
cnt, err := io.ReadFull(f.r, buf)
f.roffset += int64(cnt)
f.copyLen -= cnt
f.dict.writeMark(cnt)
if err != nil {
f.err = noEOF(err)
return
}
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = copyData
return
}
f.finishBlock()
}
func (f *decompressor) finishBlock() {
if f.final {
if f.dict.availRead() > 0 {
f.toRead = f.dict.readFlush()
}
f.err = io.EOF
}
f.step = nextBlock
}
func (f *decompressor) doStep() {
switch f.step {
case copyData:
f.copyData()
case nextBlock:
f.nextBlock()
case huffmanBytesBuffer:
f.huffmanBytesBuffer()
case huffmanBytesReader:
f.huffmanBytesReader()
case huffmanBufioReader:
f.huffmanBufioReader()
case huffmanStringsReader:
f.huffmanStringsReader()
case huffmanGenericReader:
f.huffmanGenericReader()
default:
panic("BUG: unexpected step state")
}
}
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
func noEOF(e error) error {
if e == io.EOF {
return io.ErrUnexpectedEOF
}
return e
}
func (f *decompressor) moreBits() error {
c, err := f.r.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << (f.nb & regSizeMaskUint32)
f.nb += 8
return nil
}
// Read the next Huffman-encoded symbol from f according to h.
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(h.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := f.r.ReadByte()
if err != nil {
f.b = b
f.nb = nb
return 0, noEOF(err)
}
f.roffset++
b |= uint32(c) << (nb & regSizeMaskUint32)
nb += 8
}
chunk := h.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return 0, f.err
}
f.b = b >> (n & regSizeMaskUint32)
f.nb = nb - n
return int(chunk >> huffmanValueShift), nil
}
}
}
func makeReader(r io.Reader) Reader {
if rr, ok := r.(Reader); ok {
return rr
}
return bufio.NewReader(r)
}
func fixedHuffmanDecoderInit() {
fixedOnce.Do(func() {
// These come from the RFC section 3.2.6.
var bits [288]int
for i := 0; i < 144; i++ {
bits[i] = 8
}
for i := 144; i < 256; i++ {
bits[i] = 9
}
for i := 256; i < 280; i++ {
bits[i] = 7
}
for i := 280; i < 288; i++ {
bits[i] = 8
}
fixedHuffmanDecoder.init(bits[:])
})
}
func (f *decompressor) Reset(r io.Reader, dict []byte) error {
*f = decompressor{
r: makeReader(r),
bits: f.bits,
codebits: f.codebits,
h1: f.h1,
h2: f.h2,
dict: f.dict,
step: nextBlock,
}
f.dict.init(maxMatchOffset, dict)
return nil
}
// NewReader returns a new ReadCloser that can be used
// to read the uncompressed version of r.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser
// when finished reading.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReader(r io.Reader) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = nextBlock
f.dict.init(maxMatchOffset, nil)
return &f
}
// NewReaderDict is like NewReader but initializes the reader
// with a preset dictionary. The returned Reader behaves as if
// the uncompressed data stream started with the given dictionary,
// which has already been read. NewReaderDict is typically used
// to read data compressed by NewWriterDict.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = nextBlock
f.dict.init(maxMatchOffset, dict)
return &f
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,241 @@
package flate
import (
"encoding/binary"
"fmt"
"math/bits"
)
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastEncL1 struct {
fastGen
table [tableSize]tableEntry
}
// EncodeL1 uses a similar algorithm to level 1
func (e *fastEncL1) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashBytes = 5
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hashLen(cv, tableBits, hashBytes)
candidate = e.table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hashLen(now, tableBits, hashBytes)
offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
// Do one right away...
cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
cv = now
s = nextS
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
var l = int32(4)
if false {
l = e.matchlenLong(s+4, t+4, src) + 4
} else {
// inlined:
a := src[s+4:]
b := src[t+4:]
for len(a) >= 8 {
if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
l += int32(bits.TrailingZeros64(diff) >> 3)
break
}
l += 8
a = a[8:]
b = b[8:]
}
if len(a) < 8 {
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
break
}
l++
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
// Save the match found
if false {
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
} else {
// Inlined...
xoffset := uint32(s - t - baseMatchOffset)
xlength := l
oc := offsetCode(xoffset)
xoffset |= oc << 16
for xlength > 0 {
xl := xlength
if xl > 258 {
if xl > 258+baseMatchLength {
xl = 258
} else {
xl = 258 - baseMatchLength
}
}
xlength -= xl
xl -= baseMatchLength
dst.extraHist[lengthCodes1[uint8(xl)]]++
dst.offHist[oc]++
dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
dst.n++
}
}
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+l+8) < len(src) {
cv := load6432(src, s)
e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
x >>= 16
currHash := hashLen(x, tableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
cv = x >> 8
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@ -0,0 +1,214 @@
package flate
import "fmt"
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastEncL2 struct {
fastGen
table [bTableSize]tableEntry
}
// EncodeL2 uses a similar algorithm to level 1, but is capable
// of matching across blocks giving better compression at a small slowdown.
func (e *fastEncL2) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashBytes = 5
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
// When should we start skipping if we haven't found matches in a long while.
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hashLen(cv, bTableBits, hashBytes)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidate = e.table[nextHash]
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hashLen(now, bTableBits, hashBytes)
offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
// Do one right away...
cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break
}
cv = now
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+l+8) < len(src) {
cv := load6432(src, s)
e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
// Store every second hash in-between, but offset by 1.
for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, i)
nextHash := hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one
x >>= 16
nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
// Skip one
x >>= 16
nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 to s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hashLen(x, bTableBits, hashBytes)
prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
e.table[prevHash2] = tableEntry{offset: o + 1}
currHash := hashLen(x>>16, bTableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
cv = x >> 24
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@ -0,0 +1,241 @@
package flate
import "fmt"
// fastEncL3
type fastEncL3 struct {
fastGen
table [1 << 16]tableEntryPrev
}
// Encode uses a similar algorithm to level 2, will check up to two candidates.
func (e *fastEncL3) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
tableBits = 16
tableSize = 1 << tableBits
hashBytes = 5
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
}
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
e.table[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// Skip if too small.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 7
nextS := s
var candidate tableEntry
for {
nextHash := hashLen(cv, tableBits, hashBytes)
s = nextS
nextS = s + 1 + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidates := e.table[nextHash]
now := load6432(src, nextS)
// Safe offset distance until s + 4...
minOffset := e.cur + s - (maxMatchOffset - 4)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}}
// Check both candidates
candidate = candidates.Cur
if candidate.offset < minOffset {
cv = now
// Previous will also be invalid, we have nothing.
continue
}
if uint32(cv) == load3232(src, candidate.offset-e.cur) {
if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) {
break
}
// Both match and are valid, pick longest.
offset := s - (candidate.offset - e.cur)
o2 := s - (candidates.Prev.offset - e.cur)
l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
if l2 > l1 {
candidate = candidates.Prev
}
break
} else {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break
}
}
cv = now
}
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
//
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
t += l
// Index first pair after match end.
if int(t+8) < len(src) && t > 0 {
cv = load6432(src, t)
nextHash := hashLen(cv, tableBits, hashBytes)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t},
}
}
goto emitRemainder
}
// Store every 5th hash in-between.
for i := s - l + 2; i < s-5; i += 6 {
nextHash := hashLen(load6432(src, i), tableBits, hashBytes)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + i}}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 to s.
x := load6432(src, s-2)
prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2},
}
x >>= 8
prevHash = hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1},
}
x >>= 8
currHash := hashLen(x, tableBits, hashBytes)
candidates := e.table[currHash]
cv = x
e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur},
}
// Check both candidates
candidate = candidates.Cur
minOffset := e.cur + s - (maxMatchOffset - 4)
if candidate.offset > minOffset {
if uint32(cv) == load3232(src, candidate.offset-e.cur) {
// Found a match...
continue
}
candidate = candidates.Prev
if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
// Match at prev...
continue
}
}
cv = x >> 8
s++
break
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@ -0,0 +1,221 @@
package flate
import "fmt"
type fastEncL4 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntry
}
func (e *fastEncL4) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.bTable[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var t int32
for {
nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
e.bTable[nextHashL] = entry
t = lCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) {
// We got a long match. Use that.
break
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
lCandidate = e.bTable[hash7(next, tableBits)]
// If the next long is a candidate, check if we should use that instead...
lOff := nextS - (lCandidate.offset - e.cur)
if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) {
l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
if l2 > l1 {
s = nextS
t = lCandidate.offset - e.cur
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
if debugDeflate {
if t >= s {
panic("s-t")
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+8) < len(src) {
cv := load6432(src, s)
e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur}
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
// Store every 3rd hash in-between
if true {
i := nextS
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
i += 3
for ; i < s-1; i += 3 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
e.bTable[prevHashL] = tableEntry{offset: o}
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@ -0,0 +1,708 @@
package flate
import "fmt"
type fastEncL5 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL5) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// If the next long is a candidate, use that...
t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
if l == 0 {
// Extend the 4-byte match as long as possible.
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end of best match...
if sAt := s + l; l < 30 && sAt < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is 2/3 bytes depending on input.
// 3 is only a little better when it is but sometimes a lot worse.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
t2 := eLong - e.cur - l + skipBeginning
s2 := s + skipBeginning
off := s2 - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
s = s2
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
if debugDeflate {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// Store every 3rd hash in-between.
if true {
const hashEvery = 3
i := s - l + 1
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// Do an long at i+1
cv >>= 8
t = tableEntry{offset: t.offset + 1}
eLong = &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
for ; i < s-1; i += hashEvery {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}
// fastEncL5Window is a level 5 encoder,
// but with a custom window size.
type fastEncL5Window struct {
hist []byte
cur int32
maxOffset int32
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL5Window) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
)
maxMatchOffset := e.maxOffset
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// If the next long is a candidate, use that...
t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
if l == 0 {
// Extend the 4-byte match as long as possible.
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end of best match...
if sAt := s + l; l < 30 && sAt < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is 2/3 bytes depending on input.
// 3 is only a little better when it is but sometimes a lot worse.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
t2 := eLong - e.cur - l + skipBeginning
s2 := s + skipBeginning
off := s2 - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
s = s2
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
if debugDeflate {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// Store every 3rd hash in-between.
if true {
const hashEvery = 3
i := s - l + 1
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// Do an long at i+1
cv >>= 8
t = tableEntry{offset: t.offset + 1}
eLong = &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
for ; i < s-1; i += hashEvery {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}
// Reset the encoding table.
func (e *fastEncL5Window) Reset() {
// We keep the same allocs, since we are compressing the same block sizes.
if cap(e.hist) < allocHistory {
e.hist = make([]byte, 0, allocHistory)
}
// We offset current position so everything will be out of reach.
// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
if e.cur <= int32(bufferReset) {
e.cur += e.maxOffset + int32(len(e.hist))
}
e.hist = e.hist[:0]
}
func (e *fastEncL5Window) addBlock(src []byte) int32 {
// check if we have space already
maxMatchOffset := e.maxOffset
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
e.hist = make([]byte, 0, allocHistory)
} else {
if cap(e.hist) < int(maxMatchOffset*2) {
panic("unexpected buffer size")
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
copy(e.hist[0:maxMatchOffset], e.hist[offset:])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
}
s := int32(len(e.hist))
e.hist = append(e.hist, src...)
return s
}
// matchlen will return the match length between offsets and t in src.
// The maximum length returned is maxMatchLength - 4.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 {
if debugDecode {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > e.maxOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:]))
}
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 {
if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > e.maxOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}

View File

@ -0,0 +1,325 @@
package flate
import "fmt"
type fastEncL6 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL6) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
// Repeat MUST be > 1 and within range
repeat := int32(1)
for {
const skipLog = 7
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
// Calculate hashes of 'next'
nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Long candidate matches at least 4 bytes.
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// Check the previous long candidate as well.
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
// Current value did not match, but check if previous long value does.
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
// Look up next long candidate (at nextS)
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// Check repeat at s + repOff
const repOff = 1
t2 := s - repeat + repOff
if load3232(src, t2) == uint32(cv>>(8*repOff)) {
ml := e.matchlen(s+4+repOff, t2+4, src) + 4
if ml > l {
t = t2
l = ml
s += repOff
// Not worth checking more.
break
}
}
// If the next long is a candidate, use that...
t2 = lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
// This is ok, but check previous as well.
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
if l == 0 {
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end-of-match...
if sAt := s + l; sAt < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is 2/3 bytes depending on input.
// 3 is only a little better when it is but sometimes a lot worse.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)]
// Test current
t2 := eLong.Cur.offset - e.cur - l + skipBeginning
s2 := s + skipBeginning
off := s2 - t2
if off < maxMatchOffset {
if off > 0 && t2 >= 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
s = s2
}
}
// Test next:
t2 = eLong.Prev.offset - e.cur - l + skipBeginning
off := s2 - t2
if off > 0 && off < maxMatchOffset && t2 >= 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
s = s2
}
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
if false {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
repeat = s - t
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index after match end.
for i := nextS + 1; i < int32(len(src))-8; i += 2 {
cv := load6432(src, i)
e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
}
goto emitRemainder
}
// Store every long hash in-between and every second short.
if true {
for i := nextS + 1; i < s-1; i += 2 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong.Cur, eLong.Prev = t, eLong.Cur
eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
cv = load6432(src, s)
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@ -0,0 +1,16 @@
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package flate
// matchLen returns how many bytes match in a and b
//
// It assumes that:
//
// len(a) <= len(b) and len(a) > 0
//
//go:noescape
func matchLen(a []byte, b []byte) int

View File

@ -0,0 +1,68 @@
// Copied from S2 implementation.
//go:build !appengine && !noasm && gc && !noasm
#include "textflag.h"
// func matchLen(a []byte, b []byte) int
// Requires: BMI
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
MOVQ a_len+8(FP), DX
// matchLen
XORL SI, SI
CMPL DX, $0x08
JB matchlen_match4_standalone
matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
TESTQ BX, BX
JZ matchlen_loop_standalone
#ifdef GOAMD64_v3
TZCNTQ BX, BX
#else
BSFQ BX, BX
#endif
SARQ $0x03, BX
LEAL (SI)(BX*1), SI
JMP gen_match_len_end
matchlen_loop_standalone:
LEAL -8(DX), DX
LEAL 8(SI), SI
CMPL DX, $0x08
JAE matchlen_loopback_standalone
matchlen_match4_standalone:
CMPL DX, $0x04
JB matchlen_match2_standalone
MOVL (AX)(SI*1), BX
CMPL (CX)(SI*1), BX
JNE matchlen_match2_standalone
LEAL -4(DX), DX
LEAL 4(SI), SI
matchlen_match2_standalone:
CMPL DX, $0x02
JB matchlen_match1_standalone
MOVW (AX)(SI*1), BX
CMPW (CX)(SI*1), BX
JNE matchlen_match1_standalone
LEAL -2(DX), DX
LEAL 2(SI), SI
matchlen_match1_standalone:
CMPL DX, $0x01
JB gen_match_len_end
MOVB (AX)(SI*1), BL
CMPB (CX)(SI*1), BL
JNE gen_match_len_end
INCL SI
gen_match_len_end:
MOVQ SI, ret+48(FP)
RET

View File

@ -0,0 +1,33 @@
//go:build !amd64 || appengine || !gc || noasm
// +build !amd64 appengine !gc noasm
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package flate
import (
"encoding/binary"
"math/bits"
)
// matchLen returns the maximum common prefix length of a and b.
// a must be the shortest of the two.
func matchLen(a, b []byte) (n int) {
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
if diff != 0 {
return n + bits.TrailingZeros64(diff)>>3
}
n += 8
}
for i := range a {
if a[i] != b[i] {
break
}
n++
}
return n
}

View File

@ -0,0 +1,37 @@
package flate
const (
// Masks for shifts with register sizes of the shift value.
// This can be used to work around the x86 design of shifting by mod register size.
// It can be used when a variable shift is always smaller than the register size.
// reg8SizeMaskX - shift value is 8 bits, shifted is X
reg8SizeMask8 = 7
reg8SizeMask16 = 15
reg8SizeMask32 = 31
reg8SizeMask64 = 63
// reg16SizeMaskX - shift value is 16 bits, shifted is X
reg16SizeMask8 = reg8SizeMask8
reg16SizeMask16 = reg8SizeMask16
reg16SizeMask32 = reg8SizeMask32
reg16SizeMask64 = reg8SizeMask64
// reg32SizeMaskX - shift value is 32 bits, shifted is X
reg32SizeMask8 = reg8SizeMask8
reg32SizeMask16 = reg8SizeMask16
reg32SizeMask32 = reg8SizeMask32
reg32SizeMask64 = reg8SizeMask64
// reg64SizeMaskX - shift value is 64 bits, shifted is X
reg64SizeMask8 = reg8SizeMask8
reg64SizeMask16 = reg8SizeMask16
reg64SizeMask32 = reg8SizeMask32
reg64SizeMask64 = reg8SizeMask64
// regSizeMaskUintX - shift value is uint, shifted is X
regSizeMaskUint8 = reg8SizeMask8
regSizeMaskUint16 = reg8SizeMask16
regSizeMaskUint32 = reg8SizeMask32
regSizeMaskUint64 = reg8SizeMask64
)

View File

@ -0,0 +1,40 @@
//go:build !amd64
// +build !amd64
package flate
const (
// Masks for shifts with register sizes of the shift value.
// This can be used to work around the x86 design of shifting by mod register size.
// It can be used when a variable shift is always smaller than the register size.
// reg8SizeMaskX - shift value is 8 bits, shifted is X
reg8SizeMask8 = 0xff
reg8SizeMask16 = 0xff
reg8SizeMask32 = 0xff
reg8SizeMask64 = 0xff
// reg16SizeMaskX - shift value is 16 bits, shifted is X
reg16SizeMask8 = 0xffff
reg16SizeMask16 = 0xffff
reg16SizeMask32 = 0xffff
reg16SizeMask64 = 0xffff
// reg32SizeMaskX - shift value is 32 bits, shifted is X
reg32SizeMask8 = 0xffffffff
reg32SizeMask16 = 0xffffffff
reg32SizeMask32 = 0xffffffff
reg32SizeMask64 = 0xffffffff
// reg64SizeMaskX - shift value is 64 bits, shifted is X
reg64SizeMask8 = 0xffffffffffffffff
reg64SizeMask16 = 0xffffffffffffffff
reg64SizeMask32 = 0xffffffffffffffff
reg64SizeMask64 = 0xffffffffffffffff
// regSizeMaskUintX - shift value is uint, shifted is X
regSizeMaskUint8 = ^uint(0)
regSizeMaskUint16 = ^uint(0)
regSizeMaskUint32 = ^uint(0)
regSizeMaskUint64 = ^uint(0)
)

View File

@ -0,0 +1,318 @@
package flate
import (
"io"
"math"
"sync"
)
const (
maxStatelessBlock = math.MaxInt16
// dictionary will be taken from maxStatelessBlock, so limit it.
maxStatelessDict = 8 << 10
slTableBits = 13
slTableSize = 1 << slTableBits
slTableShift = 32 - slTableBits
)
type statelessWriter struct {
dst io.Writer
closed bool
}
func (s *statelessWriter) Close() error {
if s.closed {
return nil
}
s.closed = true
// Emit EOF block
return StatelessDeflate(s.dst, nil, true, nil)
}
func (s *statelessWriter) Write(p []byte) (n int, err error) {
err = StatelessDeflate(s.dst, p, false, nil)
if err != nil {
return 0, err
}
return len(p), nil
}
func (s *statelessWriter) Reset(w io.Writer) {
s.dst = w
s.closed = false
}
// NewStatelessWriter will do compression but without maintaining any state
// between Write calls.
// There will be no memory kept between Write calls,
// but compression and speed will be suboptimal.
// Because of this, the size of actual Write calls will affect output size.
func NewStatelessWriter(dst io.Writer) io.WriteCloser {
return &statelessWriter{dst: dst}
}
// bitWriterPool contains bit writers that can be reused.
var bitWriterPool = sync.Pool{
New: func() interface{} {
return newHuffmanBitWriter(nil)
},
}
// StatelessDeflate allows compressing directly to a Writer without retaining state.
// When returning everything will be flushed.
// Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
// Longer dictionaries will be truncated and will still produce valid output.
// Sending nil dictionary is perfectly fine.
func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
var dst tokens
bw := bitWriterPool.Get().(*huffmanBitWriter)
bw.reset(out)
defer func() {
// don't keep a reference to our output
bw.reset(nil)
bitWriterPool.Put(bw)
}()
if eof && len(in) == 0 {
// Just write an EOF block.
// Could be faster...
bw.writeStoredHeader(0, true)
bw.flush()
return bw.err
}
// Truncate dict
if len(dict) > maxStatelessDict {
dict = dict[len(dict)-maxStatelessDict:]
}
// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
var inDict []byte
for len(in) > 0 {
todo := in
if len(inDict) > 0 {
if len(todo) > maxStatelessBlock-maxStatelessDict {
todo = todo[:maxStatelessBlock-maxStatelessDict]
}
} else if len(todo) > maxStatelessBlock-len(dict) {
todo = todo[:maxStatelessBlock-len(dict)]
}
inOrg := in
in = in[len(todo):]
uncompressed := todo
if len(dict) > 0 {
// combine dict and source
bufLen := len(todo) + len(dict)
combined := make([]byte, bufLen)
copy(combined, dict)
copy(combined[len(dict):], todo)
todo = combined
}
// Compress
if len(inDict) == 0 {
statelessEnc(&dst, todo, int16(len(dict)))
} else {
statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
}
isEof := eof && len(in) == 0
if dst.n == 0 {
bw.writeStoredHeader(len(uncompressed), isEof)
if bw.err != nil {
return bw.err
}
bw.writeBytes(uncompressed)
} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
// If we removed less than 1/16th, huffman compress the block.
bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
} else {
bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
}
if len(in) > 0 {
// Retain a dict if we have more
inDict = inOrg[len(uncompressed)-maxStatelessDict:]
dict = nil
dst.Reset()
}
if bw.err != nil {
return bw.err
}
}
if !eof {
// Align, only a stored block can do that.
bw.writeStoredHeader(0, false)
}
bw.flush()
return bw.err
}
func hashSL(u uint32) uint32 {
return (u * 0x1e35a7bd) >> slTableShift
}
func load3216(b []byte, i int16) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load6416(b []byte, i int16) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func statelessEnc(dst *tokens, src []byte, startAt int16) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
type tableEntry struct {
offset int16
}
var table [slTableSize]tableEntry
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src)-int(startAt) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = 0
return
}
// Index until startAt
if startAt > 0 {
cv := load3232(src, 0)
for i := int16(0); i < startAt; i++ {
table[hashSL(cv)] = tableEntry{offset: i}
cv = (cv >> 8) | (uint32(src[i+4]) << 24)
}
}
s := startAt + 1
nextEmit := startAt
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int16(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3216(src, s)
for {
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hashSL(cv)
candidate = table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit || nextS <= 0 {
goto emitRemainder
}
now := load6416(src, nextS)
table[nextHash] = tableEntry{offset: s}
nextHash = hashSL(uint32(now))
if cv == load3216(src, candidate.offset) {
table[nextHash] = tableEntry{offset: nextS}
break
}
// Do one right away...
cv = uint32(now)
s = nextS
nextS++
candidate = table[nextHash]
now >>= 8
table[nextHash] = tableEntry{offset: s}
if cv == load3216(src, candidate.offset) {
table[nextHash] = tableEntry{offset: nextS}
break
}
cv = uint32(now)
s = nextS
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset
l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
// Save the match found
dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6416(src, s-2)
o := s - 2
prevHash := hashSL(uint32(x))
table[prevHash] = tableEntry{offset: o}
x >>= 16
currHash := hashSL(uint32(x))
candidate = table[currHash]
table[currHash] = tableEntry{offset: o + 2}
if uint32(x) != load3216(src, candidate.offset) {
cv = uint32(x >> 8)
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View File

@ -0,0 +1,379 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
)
const (
// bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits
// bits 16-22 offsetcode - 5 bits
// bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits
// bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
literalType = 0 << 30
matchType = 1 << 30
matchOffsetOnlyMask = 0xffff
)
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
// is lengthCodes[length - MIN_MATCH_LENGTH]
var lengthCodes = [256]uint8{
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 28,
}
// lengthCodes1 is length codes, but starting at 1.
var lengthCodes1 = [256]uint8{
1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 29,
}
var offsetCodes = [256]uint32{
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
}
// offsetCodes14 are offsetCodes, but with 14 added.
var offsetCodes14 = [256]uint32{
14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
}
type token uint32
type tokens struct {
extraHist [32]uint16 // codes 256->maxnumlit
offHist [32]uint16 // offset codes
litHist [256]uint16 // codes 0->255
nFilled int
n uint16 // Must be able to contain maxStoreBlockSize
tokens [maxStoreBlockSize + 1]token
}
func (t *tokens) Reset() {
if t.n == 0 {
return
}
t.n = 0
t.nFilled = 0
for i := range t.litHist[:] {
t.litHist[i] = 0
}
for i := range t.extraHist[:] {
t.extraHist[i] = 0
}
for i := range t.offHist[:] {
t.offHist[i] = 0
}
}
func (t *tokens) Fill() {
if t.n == 0 {
return
}
for i, v := range t.litHist[:] {
if v == 0 {
t.litHist[i] = 1
t.nFilled++
}
}
for i, v := range t.extraHist[:literalCount-256] {
if v == 0 {
t.nFilled++
t.extraHist[i] = 1
}
}
for i, v := range t.offHist[:offsetCodeCount] {
if v == 0 {
t.offHist[i] = 1
}
}
}
func indexTokens(in []token) tokens {
var t tokens
t.indexTokens(in)
return t
}
func (t *tokens) indexTokens(in []token) {
t.Reset()
for _, tok := range in {
if tok < matchType {
t.AddLiteral(tok.literal())
continue
}
t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
}
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
func emitLiteral(dst *tokens, lit []byte) {
for _, v := range lit {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
func (t *tokens) AddLiteral(lit byte) {
t.tokens[t.n] = token(lit)
t.litHist[lit]++
t.n++
}
// from https://stackoverflow.com/a/28730362
func mFastLog2(val float32) float32 {
ux := int32(math.Float32bits(val))
log2 := (float32)(((ux >> 23) & 255) - 128)
ux &= -0x7f800001
ux += 127 << 23
uval := math.Float32frombits(uint32(ux))
log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
return log2
}
// EstimatedBits will return an minimum size estimated by an *optimal*
// compression of the block.
// The size of the block
func (t *tokens) EstimatedBits() int {
shannon := float32(0)
bits := int(0)
nMatches := 0
total := int(t.n) + t.nFilled
if total > 0 {
invTotal := 1.0 / float32(total)
for _, v := range t.litHist[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
// Just add 15 for EOB
shannon += 15
for i, v := range t.extraHist[1 : literalCount-256] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
bits += int(lengthExtraBits[i&31]) * int(v)
nMatches += int(v)
}
}
}
if nMatches > 0 {
invTotal := 1.0 / float32(nMatches)
for i, v := range t.offHist[:offsetCodeCount] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
bits += int(offsetExtraBits[i&31]) * int(v)
}
}
}
return int(shannon) + bits
}
// AddMatch adds a match to the tokens.
// This function is very sensitive to inlining and right on the border.
func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
if debugDeflate {
if xlength >= maxMatchLength+baseMatchLength {
panic(fmt.Errorf("invalid length: %v", xlength))
}
if xoffset >= maxMatchOffset+baseMatchOffset {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
oCode := offsetCode(xoffset)
xoffset |= oCode << 16
t.extraHist[lengthCodes1[uint8(xlength)]]++
t.offHist[oCode&31]++
t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
t.n++
}
// AddMatchLong adds a match to the tokens, potentially longer than max match length.
// Length should NOT have the base subtracted, only offset should.
func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
if debugDeflate {
if xoffset >= maxMatchOffset+baseMatchOffset {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
oc := offsetCode(xoffset)
xoffset |= oc << 16
for xlength > 0 {
xl := xlength
if xl > 258 {
// We need to have at least baseMatchLength left over for next loop.
if xl > 258+baseMatchLength {
xl = 258
} else {
xl = 258 - baseMatchLength
}
}
xlength -= xl
xl -= baseMatchLength
t.extraHist[lengthCodes1[uint8(xl)]]++
t.offHist[oc&31]++
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
t.n++
}
}
func (t *tokens) AddEOB() {
t.tokens[t.n] = token(endBlockMarker)
t.extraHist[0]++
t.n++
}
func (t *tokens) Slice() []token {
return t.tokens[:t.n]
}
// VarInt returns the tokens as varint encoded bytes.
func (t *tokens) VarInt() []byte {
var b = make([]byte, binary.MaxVarintLen32*int(t.n))
var off int
for _, v := range t.tokens[:t.n] {
off += binary.PutUvarint(b[off:], uint64(v))
}
return b[:off]
}
// FromVarInt restores t to the varint encoded tokens provided.
// Any data in t is removed.
func (t *tokens) FromVarInt(b []byte) error {
var buf = bytes.NewReader(b)
var toks []token
for {
r, err := binary.ReadUvarint(buf)
if err == io.EOF {
break
}
if err != nil {
return err
}
toks = append(toks, token(r))
}
t.indexTokens(toks)
return nil
}
// Returns the type of a token
func (t token) typ() uint32 { return uint32(t) & typeMask }
// Returns the literal of a literal token
func (t token) literal() uint8 { return uint8(t) }
// Returns the extra offset of a match token
func (t token) offset() uint32 { return uint32(t) & offsetMask }
func (t token) length() uint8 { return uint8(t >> lengthShift) }
// Convert length to code.
func lengthCode(len uint8) uint8 { return lengthCodes[len] }
// Returns the offset code corresponding to a specific offset
func offsetCode(off uint32) uint32 {
if false {
if off < uint32(len(offsetCodes)) {
return offsetCodes[off&255]
} else if off>>7 < uint32(len(offsetCodes)) {
return offsetCodes[(off>>7)&255] + 14
} else {
return offsetCodes[(off>>14)&255] + 28
}
}
if off < uint32(len(offsetCodes)) {
return offsetCodes[uint8(off)]
}
return offsetCodes14[uint8(off>>7)]
}