mirror of
https://github.com/openfaas/faasd.git
synced 2025-06-19 04:26:34 +00:00
Migrate to containerd v1.7.0 and update dependencies
* Updates containerd to v1.7.0 and new binary for 32-bit Arm OSes. * Updates Go dependencies - openfaas and external Signed-off-by: Alex Ellis (OpenFaaS Ltd) <alexellis2@gmail.com>
This commit is contained in:
committed by
Alex Ellis
parent
9efd019e86
commit
c41c2cd9fc
2
vendor/github.com/klauspost/compress/zstd/README.md
generated
vendored
2
vendor/github.com/klauspost/compress/zstd/README.md
generated
vendored
@ -12,6 +12,8 @@ The `zstd` package is provided as open source software using a Go standard licen
|
||||
|
||||
Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors.
|
||||
|
||||
For seekable zstd streams, see [this excellent package](https://github.com/SaveTheRbtz/zstd-seekable-format-go).
|
||||
|
||||
## Installation
|
||||
|
||||
Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.
|
||||
|
23
vendor/github.com/klauspost/compress/zstd/blockdec.go
generated
vendored
23
vendor/github.com/klauspost/compress/zstd/blockdec.go
generated
vendored
@ -9,8 +9,8 @@ import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
@ -83,8 +83,9 @@ type blockDec struct {
|
||||
|
||||
err error
|
||||
|
||||
// Check against this crc
|
||||
checkCRC []byte
|
||||
// Check against this crc, if hasCRC is true.
|
||||
checkCRC uint32
|
||||
hasCRC bool
|
||||
|
||||
// Frame to use for singlethreaded decoding.
|
||||
// Should not be used by the decoder itself since parent may be another frame.
|
||||
@ -192,16 +193,14 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
|
||||
}
|
||||
|
||||
// Read block data.
|
||||
if cap(b.dataStorage) < cSize {
|
||||
if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize {
|
||||
// byteBuf doesn't need a destination buffer.
|
||||
if b.lowMem || cSize > maxCompressedBlockSize {
|
||||
b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
|
||||
} else {
|
||||
b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
|
||||
}
|
||||
}
|
||||
if cap(b.dst) <= maxSize {
|
||||
b.dst = make([]byte, 0, maxSize+1)
|
||||
}
|
||||
b.data, err = br.readBig(cSize, b.dataStorage)
|
||||
if err != nil {
|
||||
if debugDecoder {
|
||||
@ -210,6 +209,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
|
||||
}
|
||||
return err
|
||||
}
|
||||
if cap(b.dst) <= maxSize {
|
||||
b.dst = make([]byte, 0, maxSize+1)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -233,7 +235,7 @@ func (b *blockDec) decodeBuf(hist *history) error {
|
||||
if b.lowMem {
|
||||
b.dst = make([]byte, b.RLESize)
|
||||
} else {
|
||||
b.dst = make([]byte, maxBlockSize)
|
||||
b.dst = make([]byte, maxCompressedBlockSize)
|
||||
}
|
||||
}
|
||||
b.dst = b.dst[:b.RLESize]
|
||||
@ -441,6 +443,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
|
||||
}
|
||||
}
|
||||
var err error
|
||||
if debugDecoder {
|
||||
println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals))
|
||||
}
|
||||
huff, literals, err = huff0.ReadTable(literals, huff)
|
||||
if err != nil {
|
||||
println("reading huffman table:", err)
|
||||
@ -651,7 +656,7 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
||||
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
|
||||
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
|
||||
buf.Write(in)
|
||||
ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
|
||||
os.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
18
vendor/github.com/klauspost/compress/zstd/bytebuf.go
generated
vendored
18
vendor/github.com/klauspost/compress/zstd/bytebuf.go
generated
vendored
@ -7,7 +7,6 @@ package zstd
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
)
|
||||
|
||||
type byteBuffer interface {
|
||||
@ -23,7 +22,7 @@ type byteBuffer interface {
|
||||
readByte() (byte, error)
|
||||
|
||||
// Skip n bytes.
|
||||
skipN(n int) error
|
||||
skipN(n int64) error
|
||||
}
|
||||
|
||||
// in-memory buffer
|
||||
@ -55,16 +54,19 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
|
||||
func (b *byteBuf) readByte() (byte, error) {
|
||||
bb := *b
|
||||
if len(bb) < 1 {
|
||||
return 0, nil
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
r := bb[0]
|
||||
*b = bb[1:]
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func (b *byteBuf) skipN(n int) error {
|
||||
func (b *byteBuf) skipN(n int64) error {
|
||||
bb := *b
|
||||
if len(bb) < n {
|
||||
if n < 0 {
|
||||
return fmt.Errorf("negative skip (%d) requested", n)
|
||||
}
|
||||
if int64(len(bb)) < n {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
*b = bb[n:]
|
||||
@ -120,9 +122,9 @@ func (r *readerWrapper) readByte() (byte, error) {
|
||||
return r.tmp[0], nil
|
||||
}
|
||||
|
||||
func (r *readerWrapper) skipN(n int) error {
|
||||
n2, err := io.CopyN(ioutil.Discard, r.r, int64(n))
|
||||
if n2 != int64(n) {
|
||||
func (r *readerWrapper) skipN(n int64) error {
|
||||
n2, err := io.CopyN(io.Discard, r.r, n)
|
||||
if n2 != n {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
|
9
vendor/github.com/klauspost/compress/zstd/decodeheader.go
generated
vendored
9
vendor/github.com/klauspost/compress/zstd/decodeheader.go
generated
vendored
@ -4,7 +4,6 @@
|
||||
package zstd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
@ -102,8 +101,8 @@ func (h *Header) Decode(in []byte) error {
|
||||
}
|
||||
h.HeaderSize += 4
|
||||
b, in := in[:4], in[4:]
|
||||
if !bytes.Equal(b, frameMagic) {
|
||||
if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
|
||||
if string(b) != frameMagic {
|
||||
if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
|
||||
return ErrMagicMismatch
|
||||
}
|
||||
if len(in) < 4 {
|
||||
@ -153,7 +152,7 @@ func (h *Header) Decode(in []byte) error {
|
||||
}
|
||||
b, in = in[:size], in[size:]
|
||||
h.HeaderSize += int(size)
|
||||
switch size {
|
||||
switch len(b) {
|
||||
case 1:
|
||||
h.DictionaryID = uint32(b[0])
|
||||
case 2:
|
||||
@ -183,7 +182,7 @@ func (h *Header) Decode(in []byte) error {
|
||||
}
|
||||
b, in = in[:fcsSize], in[fcsSize:]
|
||||
h.HeaderSize += int(fcsSize)
|
||||
switch fcsSize {
|
||||
switch len(b) {
|
||||
case 1:
|
||||
h.FrameContentSize = uint64(b[0])
|
||||
case 2:
|
||||
|
140
vendor/github.com/klauspost/compress/zstd/decoder.go
generated
vendored
140
vendor/github.com/klauspost/compress/zstd/decoder.go
generated
vendored
@ -5,7 +5,6 @@
|
||||
package zstd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
@ -35,13 +34,13 @@ type Decoder struct {
|
||||
br readerWrapper
|
||||
enabled bool
|
||||
inFrame bool
|
||||
dstBuf []byte
|
||||
}
|
||||
|
||||
frame *frameDec
|
||||
|
||||
// Custom dictionaries.
|
||||
// Always uses copies.
|
||||
dicts map[uint32]dict
|
||||
dicts map[uint32]*dict
|
||||
|
||||
// streamWg is the waitgroup for all streams
|
||||
streamWg sync.WaitGroup
|
||||
@ -103,7 +102,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
|
||||
}
|
||||
|
||||
// Transfer option dicts.
|
||||
d.dicts = make(map[uint32]dict, len(d.o.dicts))
|
||||
d.dicts = make(map[uint32]*dict, len(d.o.dicts))
|
||||
for _, dc := range d.o.dicts {
|
||||
d.dicts[dc.id] = dc
|
||||
}
|
||||
@ -187,21 +186,23 @@ func (d *Decoder) Reset(r io.Reader) error {
|
||||
}
|
||||
|
||||
// If bytes buffer and < 5MB, do sync decoding anyway.
|
||||
if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
|
||||
if bb, ok := r.(byter); ok && bb.Len() < d.o.decodeBufsBelow && !d.o.limitToCap {
|
||||
bb2 := bb
|
||||
if debugDecoder {
|
||||
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
|
||||
}
|
||||
b := bb2.Bytes()
|
||||
var dst []byte
|
||||
if cap(d.current.b) > 0 {
|
||||
dst = d.current.b
|
||||
if cap(d.syncStream.dstBuf) > 0 {
|
||||
dst = d.syncStream.dstBuf[:0]
|
||||
}
|
||||
|
||||
dst, err := d.DecodeAll(b, dst[:0])
|
||||
dst, err := d.DecodeAll(b, dst)
|
||||
if err == nil {
|
||||
err = io.EOF
|
||||
}
|
||||
// Save output buffer
|
||||
d.syncStream.dstBuf = dst
|
||||
d.current.b = dst
|
||||
d.current.err = err
|
||||
d.current.flushed = true
|
||||
@ -216,6 +217,7 @@ func (d *Decoder) Reset(r io.Reader) error {
|
||||
d.current.err = nil
|
||||
d.current.flushed = false
|
||||
d.current.d = nil
|
||||
d.syncStream.dstBuf = nil
|
||||
|
||||
// Ensure no-one else is still running...
|
||||
d.streamWg.Wait()
|
||||
@ -312,6 +314,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
||||
// Grab a block decoder and frame decoder.
|
||||
block := <-d.decoders
|
||||
frame := block.localFrame
|
||||
initialSize := len(dst)
|
||||
defer func() {
|
||||
if debugDecoder {
|
||||
printf("re-adding decoder: %p", block)
|
||||
@ -337,21 +340,26 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
||||
}
|
||||
return dst, err
|
||||
}
|
||||
if frame.DictionaryID != nil {
|
||||
dict, ok := d.dicts[*frame.DictionaryID]
|
||||
if !ok {
|
||||
return nil, ErrUnknownDictionary
|
||||
}
|
||||
if debugDecoder {
|
||||
println("setting dict", frame.DictionaryID)
|
||||
}
|
||||
frame.history.setDict(&dict)
|
||||
if err = d.setDict(frame); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if frame.WindowSize > d.o.maxWindowSize {
|
||||
if debugDecoder {
|
||||
println("window size exceeded:", frame.WindowSize, ">", d.o.maxWindowSize)
|
||||
}
|
||||
return dst, ErrWindowSizeExceeded
|
||||
}
|
||||
if frame.FrameContentSize != fcsUnknown {
|
||||
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
|
||||
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)-initialSize) {
|
||||
if debugDecoder {
|
||||
println("decoder size exceeded; fcs:", frame.FrameContentSize, "> mcs:", d.o.maxDecodedSize-uint64(len(dst)-initialSize), "len:", len(dst))
|
||||
}
|
||||
return dst, ErrDecoderSizeExceeded
|
||||
}
|
||||
if d.o.limitToCap && frame.FrameContentSize > uint64(cap(dst)-len(dst)) {
|
||||
if debugDecoder {
|
||||
println("decoder size exceeded; fcs:", frame.FrameContentSize, "> (cap-len)", cap(dst)-len(dst))
|
||||
}
|
||||
return dst, ErrDecoderSizeExceeded
|
||||
}
|
||||
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
|
||||
@ -361,7 +369,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if cap(dst) == 0 {
|
||||
if cap(dst) == 0 && !d.o.limitToCap {
|
||||
// Allocate len(input) * 2 by default if nothing is provided
|
||||
// and we didn't get frame content size.
|
||||
size := len(input) * 2
|
||||
@ -379,6 +387,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
||||
if err != nil {
|
||||
return dst, err
|
||||
}
|
||||
if uint64(len(dst)-initialSize) > d.o.maxDecodedSize {
|
||||
return dst, ErrDecoderSizeExceeded
|
||||
}
|
||||
if len(frame.bBuf) == 0 {
|
||||
if debugDecoder {
|
||||
println("frame dbuf empty")
|
||||
@ -439,7 +450,11 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
|
||||
println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
|
||||
}
|
||||
|
||||
if !d.o.ignoreChecksum && len(next.b) > 0 {
|
||||
if d.o.ignoreChecksum {
|
||||
return true
|
||||
}
|
||||
|
||||
if len(next.b) > 0 {
|
||||
n, err := d.current.crc.Write(next.b)
|
||||
if err == nil {
|
||||
if n != len(next.b) {
|
||||
@ -447,18 +462,16 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 {
|
||||
got := d.current.crc.Sum64()
|
||||
var tmp [4]byte
|
||||
binary.LittleEndian.PutUint32(tmp[:], uint32(got))
|
||||
if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) {
|
||||
if next.err == nil && next.d != nil && next.d.hasCRC {
|
||||
got := uint32(d.current.crc.Sum64())
|
||||
if got != next.d.checkCRC {
|
||||
if debugDecoder {
|
||||
println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
|
||||
printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC)
|
||||
}
|
||||
d.current.err = ErrCRCMismatch
|
||||
} else {
|
||||
if debugDecoder {
|
||||
println("CRC ok", tmp[:])
|
||||
printf("CRC ok %08x\n", got)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -474,18 +487,12 @@ func (d *Decoder) nextBlockSync() (ok bool) {
|
||||
if !d.syncStream.inFrame {
|
||||
d.frame.history.reset()
|
||||
d.current.err = d.frame.reset(&d.syncStream.br)
|
||||
if d.current.err == nil {
|
||||
d.current.err = d.setDict(d.frame)
|
||||
}
|
||||
if d.current.err != nil {
|
||||
return false
|
||||
}
|
||||
if d.frame.DictionaryID != nil {
|
||||
dict, ok := d.dicts[*d.frame.DictionaryID]
|
||||
if !ok {
|
||||
d.current.err = ErrUnknownDictionary
|
||||
return false
|
||||
} else {
|
||||
d.frame.history.setDict(&dict)
|
||||
}
|
||||
}
|
||||
if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize {
|
||||
d.current.err = ErrDecoderSizeExceeded
|
||||
return false
|
||||
@ -664,6 +671,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||
if debugDecoder {
|
||||
println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
|
||||
}
|
||||
hist.reset()
|
||||
hist.decoders = block.async.newHist.decoders
|
||||
hist.recentOffsets = block.async.newHist.recentOffsets
|
||||
hist.windowSize = block.async.newHist.windowSize
|
||||
@ -695,6 +703,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||
seqExecute <- block
|
||||
}
|
||||
close(seqExecute)
|
||||
hist.reset()
|
||||
}()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
@ -718,6 +727,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||
if debugDecoder {
|
||||
println("Async 2: new history")
|
||||
}
|
||||
hist.reset()
|
||||
hist.windowSize = block.async.newHist.windowSize
|
||||
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
|
||||
if block.async.newHist.dict != nil {
|
||||
@ -747,7 +757,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||
if block.lowMem {
|
||||
block.dst = make([]byte, block.RLESize)
|
||||
} else {
|
||||
block.dst = make([]byte, maxBlockSize)
|
||||
block.dst = make([]byte, maxCompressedBlockSize)
|
||||
}
|
||||
}
|
||||
block.dst = block.dst[:block.RLESize]
|
||||
@ -799,13 +809,14 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||
if debugDecoder {
|
||||
println("decoder goroutines finished")
|
||||
}
|
||||
hist.reset()
|
||||
}()
|
||||
|
||||
var hist history
|
||||
decodeStream:
|
||||
for {
|
||||
var hist history
|
||||
var hasErr bool
|
||||
|
||||
hist.reset()
|
||||
decodeBlock := func(block *blockDec) {
|
||||
if hasErr {
|
||||
if block != nil {
|
||||
@ -840,15 +851,14 @@ decodeStream:
|
||||
if debugDecoder && err != nil {
|
||||
println("Frame decoder returned", err)
|
||||
}
|
||||
if err == nil && frame.DictionaryID != nil {
|
||||
dict, ok := d.dicts[*frame.DictionaryID]
|
||||
if !ok {
|
||||
err = ErrUnknownDictionary
|
||||
} else {
|
||||
frame.history.setDict(&dict)
|
||||
}
|
||||
if err == nil {
|
||||
err = d.setDict(frame)
|
||||
}
|
||||
if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
|
||||
if debugDecoder {
|
||||
println("decoder size exceeded, fws:", d.frame.WindowSize, "> mws:", d.o.maxWindowSize)
|
||||
}
|
||||
|
||||
err = ErrDecoderSizeExceeded
|
||||
}
|
||||
if err != nil {
|
||||
@ -890,18 +900,22 @@ decodeStream:
|
||||
println("next block returned error:", err)
|
||||
}
|
||||
dec.err = err
|
||||
dec.checkCRC = nil
|
||||
dec.hasCRC = false
|
||||
if dec.Last && frame.HasCheckSum && err == nil {
|
||||
crc, err := frame.rawInput.readSmall(4)
|
||||
if err != nil {
|
||||
if len(crc) < 4 {
|
||||
if err == nil {
|
||||
err = io.ErrUnexpectedEOF
|
||||
|
||||
}
|
||||
println("CRC missing?", err)
|
||||
dec.err = err
|
||||
}
|
||||
var tmp [4]byte
|
||||
copy(tmp[:], crc)
|
||||
dec.checkCRC = tmp[:]
|
||||
if debugDecoder {
|
||||
println("found crc to check:", dec.checkCRC)
|
||||
} else {
|
||||
dec.checkCRC = binary.LittleEndian.Uint32(crc)
|
||||
dec.hasCRC = true
|
||||
if debugDecoder {
|
||||
printf("found crc to check: %08x\n", dec.checkCRC)
|
||||
}
|
||||
}
|
||||
}
|
||||
err = dec.err
|
||||
@ -917,5 +931,23 @@ decodeStream:
|
||||
}
|
||||
close(seqDecode)
|
||||
wg.Wait()
|
||||
hist.reset()
|
||||
d.frame.history.b = frameHistCache
|
||||
}
|
||||
|
||||
func (d *Decoder) setDict(frame *frameDec) (err error) {
|
||||
dict, ok := d.dicts[frame.DictionaryID]
|
||||
if ok {
|
||||
if debugDecoder {
|
||||
println("setting dict", frame.DictionaryID)
|
||||
}
|
||||
frame.history.setDict(dict)
|
||||
} else if frame.DictionaryID != 0 {
|
||||
// A zero or missing dictionary id is ambiguous:
|
||||
// either dictionary zero, or no dictionary. In particular,
|
||||
// zstd --patch-from uses this id for the source file,
|
||||
// so only return an error if the dictionary id is not zero.
|
||||
err = ErrUnknownDictionary
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
68
vendor/github.com/klauspost/compress/zstd/decoder_options.go
generated
vendored
68
vendor/github.com/klauspost/compress/zstd/decoder_options.go
generated
vendored
@ -6,6 +6,8 @@ package zstd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/bits"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
@ -14,20 +16,23 @@ type DOption func(*decoderOptions) error
|
||||
|
||||
// options retains accumulated state of multiple options.
|
||||
type decoderOptions struct {
|
||||
lowMem bool
|
||||
concurrent int
|
||||
maxDecodedSize uint64
|
||||
maxWindowSize uint64
|
||||
dicts []dict
|
||||
ignoreChecksum bool
|
||||
lowMem bool
|
||||
concurrent int
|
||||
maxDecodedSize uint64
|
||||
maxWindowSize uint64
|
||||
dicts []*dict
|
||||
ignoreChecksum bool
|
||||
limitToCap bool
|
||||
decodeBufsBelow int
|
||||
}
|
||||
|
||||
func (o *decoderOptions) setDefault() {
|
||||
*o = decoderOptions{
|
||||
// use less ram: true for now, but may change.
|
||||
lowMem: true,
|
||||
concurrent: runtime.GOMAXPROCS(0),
|
||||
maxWindowSize: MaxWindowSize,
|
||||
lowMem: true,
|
||||
concurrent: runtime.GOMAXPROCS(0),
|
||||
maxWindowSize: MaxWindowSize,
|
||||
decodeBufsBelow: 128 << 10,
|
||||
}
|
||||
if o.concurrent > 4 {
|
||||
o.concurrent = 4
|
||||
@ -82,7 +87,13 @@ func WithDecoderMaxMemory(n uint64) DOption {
|
||||
}
|
||||
|
||||
// WithDecoderDicts allows to register one or more dictionaries for the decoder.
|
||||
// If several dictionaries with the same ID is provided the last one will be used.
|
||||
//
|
||||
// Each slice in dict must be in the [dictionary format] produced by
|
||||
// "zstd --train" from the Zstandard reference implementation.
|
||||
//
|
||||
// If several dictionaries with the same ID are provided, the last one will be used.
|
||||
//
|
||||
// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
|
||||
func WithDecoderDicts(dicts ...[]byte) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
for _, b := range dicts {
|
||||
@ -90,12 +101,24 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
o.dicts = append(o.dicts, *d)
|
||||
o.dicts = append(o.dicts, d)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithEncoderDictRaw registers a dictionary that may be used by the decoder.
|
||||
// The slice content can be arbitrary data.
|
||||
func WithDecoderDictRaw(id uint32, content []byte) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
|
||||
return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
|
||||
}
|
||||
o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}})
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithDecoderMaxWindow allows to set a maximum window size for decodes.
|
||||
// This allows rejecting packets that will cause big memory usage.
|
||||
// The Decoder will likely allocate more memory based on the WithDecoderLowmem setting.
|
||||
@ -114,6 +137,29 @@ func WithDecoderMaxWindow(size uint64) DOption {
|
||||
}
|
||||
}
|
||||
|
||||
// WithDecodeAllCapLimit will limit DecodeAll to decoding cap(dst)-len(dst) bytes,
|
||||
// or any size set in WithDecoderMaxMemory.
|
||||
// This can be used to limit decoding to a specific maximum output size.
|
||||
// Disabled by default.
|
||||
func WithDecodeAllCapLimit(b bool) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
o.limitToCap = b
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithDecodeBuffersBelow will fully decode readers that have a
|
||||
// `Bytes() []byte` and `Len() int` interface similar to bytes.Buffer.
|
||||
// This typically uses less allocations but will have the full decompressed object in memory.
|
||||
// Note that DecodeAllCapLimit will disable this, as well as giving a size of 0 or less.
|
||||
// Default is 128KiB.
|
||||
func WithDecodeBuffersBelow(size int) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
o.decodeBufsBelow = size
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// IgnoreChecksum allows to forcibly ignore checksum checking.
|
||||
func IgnoreChecksum(b bool) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
|
51
vendor/github.com/klauspost/compress/zstd/dict.go
generated
vendored
51
vendor/github.com/klauspost/compress/zstd/dict.go
generated
vendored
@ -1,7 +1,6 @@
|
||||
package zstd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
@ -20,7 +19,10 @@ type dict struct {
|
||||
content []byte
|
||||
}
|
||||
|
||||
var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
|
||||
const dictMagic = "\x37\xa4\x30\xec"
|
||||
|
||||
// Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB.
|
||||
const dictMaxLength = 1 << 31
|
||||
|
||||
// ID returns the dictionary id or 0 if d is nil.
|
||||
func (d *dict) ID() uint32 {
|
||||
@ -30,14 +32,38 @@ func (d *dict) ID() uint32 {
|
||||
return d.id
|
||||
}
|
||||
|
||||
// DictContentSize returns the dictionary content size or 0 if d is nil.
|
||||
func (d *dict) DictContentSize() int {
|
||||
// ContentSize returns the dictionary content size or 0 if d is nil.
|
||||
func (d *dict) ContentSize() int {
|
||||
if d == nil {
|
||||
return 0
|
||||
}
|
||||
return len(d.content)
|
||||
}
|
||||
|
||||
// Content returns the dictionary content.
|
||||
func (d *dict) Content() []byte {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
return d.content
|
||||
}
|
||||
|
||||
// Offsets returns the initial offsets.
|
||||
func (d *dict) Offsets() [3]int {
|
||||
if d == nil {
|
||||
return [3]int{}
|
||||
}
|
||||
return d.offsets
|
||||
}
|
||||
|
||||
// LitEncoder returns the literal encoder.
|
||||
func (d *dict) LitEncoder() *huff0.Scratch {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
return d.litEnc
|
||||
}
|
||||
|
||||
// Load a dictionary as described in
|
||||
// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
|
||||
func loadDict(b []byte) (*dict, error) {
|
||||
@ -50,7 +76,7 @@ func loadDict(b []byte) (*dict, error) {
|
||||
ofDec: sequenceDec{fse: &fseDecoder{}},
|
||||
mlDec: sequenceDec{fse: &fseDecoder{}},
|
||||
}
|
||||
if !bytes.Equal(b[:4], dictMagic[:]) {
|
||||
if string(b[:4]) != dictMagic {
|
||||
return nil, ErrMagicMismatch
|
||||
}
|
||||
d.id = binary.LittleEndian.Uint32(b[4:8])
|
||||
@ -62,7 +88,7 @@ func loadDict(b []byte) (*dict, error) {
|
||||
var err error
|
||||
d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("loading literal table: %w", err)
|
||||
}
|
||||
d.litEnc.Reuse = huff0.ReusePolicyMust
|
||||
|
||||
@ -120,3 +146,16 @@ func loadDict(b []byte) (*dict, error) {
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
// InspectDictionary loads a zstd dictionary and provides functions to inspect the content.
|
||||
func InspectDictionary(b []byte) (interface {
|
||||
ID() uint32
|
||||
ContentSize() int
|
||||
Content() []byte
|
||||
Offsets() [3]int
|
||||
LitEncoder() *huff0.Scratch
|
||||
}, error) {
|
||||
initPredefined()
|
||||
d, err := loadDict(b)
|
||||
return d, err
|
||||
}
|
||||
|
28
vendor/github.com/klauspost/compress/zstd/enc_base.go
generated
vendored
28
vendor/github.com/klauspost/compress/zstd/enc_base.go
generated
vendored
@ -16,6 +16,7 @@ type fastBase struct {
|
||||
cur int32
|
||||
// maximum offset. Should be at least 2x block size.
|
||||
maxMatchOff int32
|
||||
bufferReset int32
|
||||
hist []byte
|
||||
crc *xxhash.Digest
|
||||
tmp [8]byte
|
||||
@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc {
|
||||
}
|
||||
|
||||
func (e *fastBase) addBlock(src []byte) int32 {
|
||||
if debugAsserts && e.cur > bufferReset {
|
||||
panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
|
||||
if debugAsserts && e.cur > e.bufferReset {
|
||||
panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
|
||||
}
|
||||
// check if we have space already
|
||||
if len(e.hist)+len(src) > cap(e.hist) {
|
||||
@ -126,24 +127,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
|
||||
panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
|
||||
}
|
||||
}
|
||||
a := src[s:]
|
||||
b := src[t:]
|
||||
b = b[:len(a)]
|
||||
end := int32((len(a) >> 3) << 3)
|
||||
for i := int32(0); i < end; i += 8 {
|
||||
if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
|
||||
return i + int32(bits.TrailingZeros64(diff)>>3)
|
||||
}
|
||||
}
|
||||
|
||||
a = a[end:]
|
||||
b = b[end:]
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return int32(i) + end
|
||||
}
|
||||
}
|
||||
return int32(len(a)) + end
|
||||
return int32(matchLen(src[s:], src[t:]))
|
||||
}
|
||||
|
||||
// Reset the encoding table.
|
||||
@ -165,13 +149,13 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
|
||||
if singleBlock {
|
||||
e.lowMem = true
|
||||
}
|
||||
e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
|
||||
e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
|
||||
e.lowMem = low
|
||||
}
|
||||
|
||||
// We offset current position so everything will be out of reach.
|
||||
// If above reset line, history will be purged.
|
||||
if e.cur < bufferReset {
|
||||
if e.cur < e.bufferReset {
|
||||
e.cur += e.maxMatchOff + int32(len(e.hist))
|
||||
}
|
||||
e.hist = e.hist[:0]
|
||||
|
73
vendor/github.com/klauspost/compress/zstd/enc_best.go
generated
vendored
73
vendor/github.com/klauspost/compress/zstd/enc_best.go
generated
vendored
@ -84,14 +84,10 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
|
||||
)
|
||||
|
||||
// Protect against e.cur wraparound.
|
||||
for e.cur >= bufferReset {
|
||||
for e.cur >= e.bufferReset-int32(len(e.hist)) {
|
||||
if len(e.hist) == 0 {
|
||||
for i := range e.table[:] {
|
||||
e.table[i] = prevEntry{}
|
||||
}
|
||||
for i := range e.longTable[:] {
|
||||
e.longTable[i] = prevEntry{}
|
||||
}
|
||||
e.table = [bestShortTableSize]prevEntry{}
|
||||
e.longTable = [bestLongTableSize]prevEntry{}
|
||||
e.cur = e.maxMatchOff
|
||||
break
|
||||
}
|
||||
@ -192,12 +188,6 @@ encodeLoop:
|
||||
panic("offset0 was 0")
|
||||
}
|
||||
|
||||
bestOf := func(a, b match) match {
|
||||
if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
const goodEnough = 100
|
||||
|
||||
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
|
||||
@ -205,36 +195,41 @@ encodeLoop:
|
||||
candidateL := e.longTable[nextHashL]
|
||||
candidateS := e.table[nextHashS]
|
||||
|
||||
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
|
||||
// Set m to a match at offset if it looks like that will improve compression.
|
||||
improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
|
||||
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
|
||||
return match{s: s, est: highScore}
|
||||
return
|
||||
}
|
||||
if debugAsserts {
|
||||
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
|
||||
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
|
||||
}
|
||||
}
|
||||
m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
|
||||
m.estBits(bitsPerByte)
|
||||
return m
|
||||
cand := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
|
||||
cand.estBits(bitsPerByte)
|
||||
if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
|
||||
*m = cand
|
||||
}
|
||||
}
|
||||
|
||||
best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
|
||||
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
|
||||
best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
|
||||
best := match{s: s, est: highScore}
|
||||
improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
|
||||
improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
|
||||
improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
|
||||
improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)
|
||||
|
||||
if canRepeat && best.length < goodEnough {
|
||||
cv32 := uint32(cv >> 8)
|
||||
spp := s + 1
|
||||
best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
|
||||
best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
|
||||
best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
|
||||
improve(&best, spp-offset1, spp, cv32, 1)
|
||||
improve(&best, spp-offset2, spp, cv32, 2)
|
||||
improve(&best, spp-offset3, spp, cv32, 3)
|
||||
if best.length > 0 {
|
||||
cv32 = uint32(cv >> 24)
|
||||
spp += 2
|
||||
best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
|
||||
best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
|
||||
best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
|
||||
improve(&best, spp-offset1, spp, cv32, 1)
|
||||
improve(&best, spp-offset2, spp, cv32, 2)
|
||||
improve(&best, spp-offset3, spp, cv32, 3)
|
||||
}
|
||||
}
|
||||
// Load next and check...
|
||||
@ -261,28 +256,30 @@ encodeLoop:
|
||||
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
|
||||
|
||||
// Short at s+1
|
||||
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
|
||||
improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
|
||||
// Long at s+1, s+2
|
||||
best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
|
||||
best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
|
||||
best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
|
||||
best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
|
||||
improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
|
||||
improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
|
||||
improve(&best, candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
|
||||
improve(&best, candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
|
||||
if false {
|
||||
// Short at s+3.
|
||||
// Too often worse...
|
||||
best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
|
||||
improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
|
||||
}
|
||||
// See if we can find a better match by checking where the current best ends.
|
||||
// Use that offset to see if we can find a better full match.
|
||||
if sAt := best.s + best.length; sAt < sLimit {
|
||||
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
|
||||
candidateEnd := e.longTable[nextHashL]
|
||||
if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
|
||||
bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
|
||||
if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
|
||||
bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
|
||||
// Start check at a fixed offset to allow for a few mismatches.
|
||||
// For this compression level 2 yields the best results.
|
||||
const skipBeginning = 2
|
||||
if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
|
||||
improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
|
||||
if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
|
||||
improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
|
||||
}
|
||||
best = bestEnd
|
||||
}
|
||||
}
|
||||
}
|
||||
|
35
vendor/github.com/klauspost/compress/zstd/enc_better.go
generated
vendored
35
vendor/github.com/klauspost/compress/zstd/enc_better.go
generated
vendored
@ -62,14 +62,10 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
|
||||
)
|
||||
|
||||
// Protect against e.cur wraparound.
|
||||
for e.cur >= bufferReset {
|
||||
for e.cur >= e.bufferReset-int32(len(e.hist)) {
|
||||
if len(e.hist) == 0 {
|
||||
for i := range e.table[:] {
|
||||
e.table[i] = tableEntry{}
|
||||
}
|
||||
for i := range e.longTable[:] {
|
||||
e.longTable[i] = prevEntry{}
|
||||
}
|
||||
e.table = [betterShortTableSize]tableEntry{}
|
||||
e.longTable = [betterLongTableSize]prevEntry{}
|
||||
e.cur = e.maxMatchOff
|
||||
break
|
||||
}
|
||||
@ -416,15 +412,23 @@ encodeLoop:
|
||||
|
||||
// Try to find a better match by searching for a long match at the end of the current best match
|
||||
if s+matched < sLimit {
|
||||
// Allow some bytes at the beginning to mismatch.
|
||||
// Sweet spot is around 3 bytes, but depends on input.
|
||||
// The skipped bytes are tested in Extend backwards,
|
||||
// and still picked up as part of the match if they do.
|
||||
const skipBeginning = 3
|
||||
|
||||
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
|
||||
cv := load3232(src, s)
|
||||
s2 := s + skipBeginning
|
||||
cv := load3232(src, s2)
|
||||
candidateL := e.longTable[nextHashL]
|
||||
coffsetL := candidateL.offset - e.cur - matched
|
||||
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
|
||||
coffsetL := candidateL.offset - e.cur - matched + skipBeginning
|
||||
if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
|
||||
// Found a long match, at least 4 bytes.
|
||||
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
|
||||
matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
|
||||
if matchedNext > matched {
|
||||
t = coffsetL
|
||||
s = s2
|
||||
matched = matchedNext
|
||||
if debugMatches {
|
||||
println("long match at end-of-match")
|
||||
@ -434,12 +438,13 @@ encodeLoop:
|
||||
|
||||
// Check prev long...
|
||||
if true {
|
||||
coffsetL = candidateL.prev - e.cur - matched
|
||||
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
|
||||
coffsetL = candidateL.prev - e.cur - matched + skipBeginning
|
||||
if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
|
||||
// Found a long match, at least 4 bytes.
|
||||
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
|
||||
matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
|
||||
if matchedNext > matched {
|
||||
t = coffsetL
|
||||
s = s2
|
||||
matched = matchedNext
|
||||
if debugMatches {
|
||||
println("prev long match at end-of-match")
|
||||
@ -578,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
|
||||
)
|
||||
|
||||
// Protect against e.cur wraparound.
|
||||
for e.cur >= bufferReset {
|
||||
for e.cur >= e.bufferReset-int32(len(e.hist)) {
|
||||
if len(e.hist) == 0 {
|
||||
for i := range e.table[:] {
|
||||
e.table[i] = tableEntry{}
|
||||
|
23
vendor/github.com/klauspost/compress/zstd/enc_dfast.go
generated
vendored
23
vendor/github.com/klauspost/compress/zstd/enc_dfast.go
generated
vendored
@ -44,14 +44,10 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
|
||||
)
|
||||
|
||||
// Protect against e.cur wraparound.
|
||||
for e.cur >= bufferReset {
|
||||
for e.cur >= e.bufferReset-int32(len(e.hist)) {
|
||||
if len(e.hist) == 0 {
|
||||
for i := range e.table[:] {
|
||||
e.table[i] = tableEntry{}
|
||||
}
|
||||
for i := range e.longTable[:] {
|
||||
e.longTable[i] = tableEntry{}
|
||||
}
|
||||
e.table = [dFastShortTableSize]tableEntry{}
|
||||
e.longTable = [dFastLongTableSize]tableEntry{}
|
||||
e.cur = e.maxMatchOff
|
||||
break
|
||||
}
|
||||
@ -388,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
|
||||
)
|
||||
|
||||
// Protect against e.cur wraparound.
|
||||
if e.cur >= bufferReset {
|
||||
if e.cur >= e.bufferReset {
|
||||
for i := range e.table[:] {
|
||||
e.table[i] = tableEntry{}
|
||||
}
|
||||
@ -685,7 +681,7 @@ encodeLoop:
|
||||
}
|
||||
|
||||
// We do not store history, so we must offset e.cur to avoid false matches for next user.
|
||||
if e.cur < bufferReset {
|
||||
if e.cur < e.bufferReset {
|
||||
e.cur += int32(len(src))
|
||||
}
|
||||
}
|
||||
@ -700,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
|
||||
)
|
||||
|
||||
// Protect against e.cur wraparound.
|
||||
for e.cur >= bufferReset {
|
||||
for e.cur >= e.bufferReset-int32(len(e.hist)) {
|
||||
if len(e.hist) == 0 {
|
||||
for i := range e.table[:] {
|
||||
e.table[i] = tableEntry{}
|
||||
@ -1103,7 +1099,8 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
|
||||
}
|
||||
|
||||
if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
|
||||
copy(e.longTable[:], e.dictLongTable)
|
||||
//copy(e.longTable[:], e.dictLongTable)
|
||||
e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable)
|
||||
for i := range e.longTableShardDirty {
|
||||
e.longTableShardDirty[i] = false
|
||||
}
|
||||
@ -1114,7 +1111,9 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
|
||||
continue
|
||||
}
|
||||
|
||||
copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
|
||||
// copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
|
||||
*(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:])
|
||||
|
||||
e.longTableShardDirty[i] = false
|
||||
}
|
||||
}
|
||||
|
20
vendor/github.com/klauspost/compress/zstd/enc_fast.go
generated
vendored
20
vendor/github.com/klauspost/compress/zstd/enc_fast.go
generated
vendored
@ -43,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
|
||||
)
|
||||
|
||||
// Protect against e.cur wraparound.
|
||||
for e.cur >= bufferReset {
|
||||
for e.cur >= e.bufferReset-int32(len(e.hist)) {
|
||||
if len(e.hist) == 0 {
|
||||
for i := range e.table[:] {
|
||||
e.table[i] = tableEntry{}
|
||||
@ -304,13 +304,13 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
|
||||
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||
)
|
||||
if debugEncoder {
|
||||
if len(src) > maxBlockSize {
|
||||
if len(src) > maxCompressedBlockSize {
|
||||
panic("src too big")
|
||||
}
|
||||
}
|
||||
|
||||
// Protect against e.cur wraparound.
|
||||
if e.cur >= bufferReset {
|
||||
if e.cur >= e.bufferReset {
|
||||
for i := range e.table[:] {
|
||||
e.table[i] = tableEntry{}
|
||||
}
|
||||
@ -538,7 +538,7 @@ encodeLoop:
|
||||
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
|
||||
}
|
||||
// We do not store history, so we must offset e.cur to avoid false matches for next user.
|
||||
if e.cur < bufferReset {
|
||||
if e.cur < e.bufferReset {
|
||||
e.cur += int32(len(src))
|
||||
}
|
||||
}
|
||||
@ -555,11 +555,9 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
|
||||
return
|
||||
}
|
||||
// Protect against e.cur wraparound.
|
||||
for e.cur >= bufferReset {
|
||||
for e.cur >= e.bufferReset-int32(len(e.hist)) {
|
||||
if len(e.hist) == 0 {
|
||||
for i := range e.table[:] {
|
||||
e.table[i] = tableEntry{}
|
||||
}
|
||||
e.table = [tableSize]tableEntry{}
|
||||
e.cur = e.maxMatchOff
|
||||
break
|
||||
}
|
||||
@ -871,7 +869,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
|
||||
const shardCnt = tableShardCnt
|
||||
const shardSize = tableShardSize
|
||||
if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
|
||||
copy(e.table[:], e.dictTable)
|
||||
//copy(e.table[:], e.dictTable)
|
||||
e.table = *(*[tableSize]tableEntry)(e.dictTable)
|
||||
for i := range e.tableShardDirty {
|
||||
e.tableShardDirty[i] = false
|
||||
}
|
||||
@ -883,7 +882,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
|
||||
continue
|
||||
}
|
||||
|
||||
copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
|
||||
//copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
|
||||
*(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
|
||||
e.tableShardDirty[i] = false
|
||||
}
|
||||
e.allDirty = false
|
||||
|
39
vendor/github.com/klauspost/compress/zstd/encoder.go
generated
vendored
39
vendor/github.com/klauspost/compress/zstd/encoder.go
generated
vendored
@ -8,6 +8,7 @@ import (
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
rdebug "runtime/debug"
|
||||
"sync"
|
||||
|
||||
@ -528,8 +529,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
|
||||
// If a non-single block is needed the encoder will reset again.
|
||||
e.encoders <- enc
|
||||
}()
|
||||
// Use single segments when above minimum window and below 1MB.
|
||||
single := len(src) < 1<<20 && len(src) > MinWindowSize
|
||||
// Use single segments when above minimum window and below window size.
|
||||
single := len(src) <= e.o.windowSize && len(src) > MinWindowSize
|
||||
if e.o.single != nil {
|
||||
single = *e.o.single
|
||||
}
|
||||
@ -639,3 +640,37 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// MaxEncodedSize returns the expected maximum
|
||||
// size of an encoded block or stream.
|
||||
func (e *Encoder) MaxEncodedSize(size int) int {
|
||||
frameHeader := 4 + 2 // magic + frame header & window descriptor
|
||||
if e.o.dict != nil {
|
||||
frameHeader += 4
|
||||
}
|
||||
// Frame content size:
|
||||
if size < 256 {
|
||||
frameHeader++
|
||||
} else if size < 65536+256 {
|
||||
frameHeader += 2
|
||||
} else if size < math.MaxInt32 {
|
||||
frameHeader += 4
|
||||
} else {
|
||||
frameHeader += 8
|
||||
}
|
||||
// Final crc
|
||||
if e.o.crc {
|
||||
frameHeader += 4
|
||||
}
|
||||
|
||||
// Max overhead is 3 bytes/block.
|
||||
// There cannot be 0 blocks.
|
||||
blocks := (size + e.o.blockSize) / e.o.blockSize
|
||||
|
||||
// Combine, add padding.
|
||||
maxSz := frameHeader + 3*blocks + size
|
||||
if e.o.pad > 1 {
|
||||
maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad))
|
||||
}
|
||||
return maxSz
|
||||
}
|
||||
|
38
vendor/github.com/klauspost/compress/zstd/encoder_options.go
generated
vendored
38
vendor/github.com/klauspost/compress/zstd/encoder_options.go
generated
vendored
@ -3,6 +3,8 @@ package zstd
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/bits"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
@ -47,22 +49,22 @@ func (o encoderOptions) encoder() encoder {
|
||||
switch o.level {
|
||||
case SpeedFastest:
|
||||
if o.dict != nil {
|
||||
return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
|
||||
return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
|
||||
}
|
||||
return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
|
||||
return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
|
||||
|
||||
case SpeedDefault:
|
||||
if o.dict != nil {
|
||||
return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
|
||||
return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
|
||||
}
|
||||
return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
|
||||
return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
|
||||
case SpeedBetterCompression:
|
||||
if o.dict != nil {
|
||||
return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
|
||||
return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
|
||||
}
|
||||
return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
|
||||
return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
|
||||
case SpeedBestCompression:
|
||||
return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
|
||||
return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
|
||||
}
|
||||
panic("unknown compression level")
|
||||
}
|
||||
@ -283,7 +285,7 @@ func WithNoEntropyCompression(b bool) EOption {
|
||||
// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
|
||||
// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
|
||||
// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
|
||||
// If this is not specified, block encodes will automatically choose this based on the input size.
|
||||
// If this is not specified, block encodes will automatically choose this based on the input size and the window size.
|
||||
// This setting has no effect on streamed encodes.
|
||||
func WithSingleSegment(b bool) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
@ -304,7 +306,13 @@ func WithLowerEncoderMem(b bool) EOption {
|
||||
}
|
||||
|
||||
// WithEncoderDict allows to register a dictionary that will be used for the encode.
|
||||
//
|
||||
// The slice dict must be in the [dictionary format] produced by
|
||||
// "zstd --train" from the Zstandard reference implementation.
|
||||
//
|
||||
// The encoder *may* choose to use no dictionary instead for certain payloads.
|
||||
//
|
||||
// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
|
||||
func WithEncoderDict(dict []byte) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
d, err := loadDict(dict)
|
||||
@ -315,3 +323,17 @@ func WithEncoderDict(dict []byte) EOption {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithEncoderDictRaw registers a dictionary that may be used by the encoder.
|
||||
//
|
||||
// The slice content may contain arbitrary data. It will be used as an initial
|
||||
// history.
|
||||
func WithEncoderDictRaw(id uint32, content []byte) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
|
||||
return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
|
||||
}
|
||||
o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
104
vendor/github.com/klauspost/compress/zstd/framedec.go
generated
vendored
104
vendor/github.com/klauspost/compress/zstd/framedec.go
generated
vendored
@ -5,7 +5,7 @@
|
||||
package zstd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"io"
|
||||
@ -29,7 +29,7 @@ type frameDec struct {
|
||||
|
||||
FrameContentSize uint64
|
||||
|
||||
DictionaryID *uint32
|
||||
DictionaryID uint32
|
||||
HasCheckSum bool
|
||||
SingleSegment bool
|
||||
}
|
||||
@ -43,9 +43,9 @@ const (
|
||||
MaxWindowSize = 1 << 29
|
||||
)
|
||||
|
||||
var (
|
||||
frameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
|
||||
skippableFrameMagic = []byte{0x2a, 0x4d, 0x18}
|
||||
const (
|
||||
frameMagic = "\x28\xb5\x2f\xfd"
|
||||
skippableFrameMagic = "\x2a\x4d\x18"
|
||||
)
|
||||
|
||||
func newFrameDec(o decoderOptions) *frameDec {
|
||||
@ -89,9 +89,9 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
copy(signature[1:], b)
|
||||
}
|
||||
|
||||
if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
|
||||
if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
|
||||
if debugDecoder {
|
||||
println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
|
||||
println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic)))
|
||||
}
|
||||
// Break if not skippable frame.
|
||||
break
|
||||
@ -106,7 +106,7 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
}
|
||||
n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
|
||||
println("Skipping frame with", n, "bytes.")
|
||||
err = br.skipN(int(n))
|
||||
err = br.skipN(int64(n))
|
||||
if err != nil {
|
||||
if debugDecoder {
|
||||
println("Reading discarded frame", err)
|
||||
@ -114,9 +114,9 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if !bytes.Equal(signature[:], frameMagic) {
|
||||
if string(signature[:]) != frameMagic {
|
||||
if debugDecoder {
|
||||
println("Got magic numbers: ", signature, "want:", frameMagic)
|
||||
println("Got magic numbers: ", signature, "want:", []byte(frameMagic))
|
||||
}
|
||||
return ErrMagicMismatch
|
||||
}
|
||||
@ -155,7 +155,7 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
|
||||
// Read Dictionary_ID
|
||||
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
|
||||
d.DictionaryID = nil
|
||||
d.DictionaryID = 0
|
||||
if size := fhd & 3; size != 0 {
|
||||
if size == 3 {
|
||||
size = 4
|
||||
@ -167,7 +167,7 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
return err
|
||||
}
|
||||
var id uint32
|
||||
switch size {
|
||||
switch len(b) {
|
||||
case 1:
|
||||
id = uint32(b[0])
|
||||
case 2:
|
||||
@ -178,11 +178,7 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
if debugDecoder {
|
||||
println("Dict size", size, "ID:", id)
|
||||
}
|
||||
if id > 0 {
|
||||
// ID 0 means "sorry, no dictionary anyway".
|
||||
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
|
||||
d.DictionaryID = &id
|
||||
}
|
||||
d.DictionaryID = id
|
||||
}
|
||||
|
||||
// Read Frame_Content_Size
|
||||
@ -204,7 +200,7 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
println("Reading Frame content", err)
|
||||
return err
|
||||
}
|
||||
switch fcsSize {
|
||||
switch len(b) {
|
||||
case 1:
|
||||
d.FrameContentSize = uint64(b[0])
|
||||
case 2:
|
||||
@ -231,20 +227,27 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
d.crc.Reset()
|
||||
}
|
||||
|
||||
if d.WindowSize > d.o.maxWindowSize {
|
||||
if debugDecoder {
|
||||
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
|
||||
}
|
||||
return ErrWindowSizeExceeded
|
||||
}
|
||||
|
||||
if d.WindowSize == 0 && d.SingleSegment {
|
||||
// We may not need window in this case.
|
||||
d.WindowSize = d.FrameContentSize
|
||||
if d.WindowSize < MinWindowSize {
|
||||
d.WindowSize = MinWindowSize
|
||||
}
|
||||
if d.WindowSize > d.o.maxDecodedSize {
|
||||
if debugDecoder {
|
||||
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
|
||||
}
|
||||
return ErrDecoderSizeExceeded
|
||||
}
|
||||
}
|
||||
|
||||
if d.WindowSize > uint64(d.o.maxWindowSize) {
|
||||
if debugDecoder {
|
||||
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
|
||||
}
|
||||
return ErrWindowSizeExceeded
|
||||
}
|
||||
// The minimum Window_Size is 1 KB.
|
||||
if d.WindowSize < MinWindowSize {
|
||||
if debugDecoder {
|
||||
@ -254,11 +257,16 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
}
|
||||
d.history.windowSize = int(d.WindowSize)
|
||||
if !d.o.lowMem || d.history.windowSize < maxBlockSize {
|
||||
// Alloc 2x window size if not low-mem, or very small window size.
|
||||
// Alloc 2x window size if not low-mem, or window size below 2MB.
|
||||
d.history.allocFrameBuffer = d.history.windowSize * 2
|
||||
} else {
|
||||
// Alloc with one additional block
|
||||
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
|
||||
if d.o.lowMem {
|
||||
// Alloc with 1MB extra.
|
||||
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2
|
||||
} else {
|
||||
// Alloc with 2MB extra.
|
||||
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
|
||||
}
|
||||
}
|
||||
|
||||
if debugDecoder {
|
||||
@ -293,7 +301,7 @@ func (d *frameDec) checkCRC() error {
|
||||
}
|
||||
|
||||
// We can overwrite upper tmp now
|
||||
want, err := d.rawInput.readSmall(4)
|
||||
buf, err := d.rawInput.readSmall(4)
|
||||
if err != nil {
|
||||
println("CRC missing?", err)
|
||||
return err
|
||||
@ -303,22 +311,17 @@ func (d *frameDec) checkCRC() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
var tmp [4]byte
|
||||
got := d.crc.Sum64()
|
||||
// Flip to match file order.
|
||||
tmp[0] = byte(got >> 0)
|
||||
tmp[1] = byte(got >> 8)
|
||||
tmp[2] = byte(got >> 16)
|
||||
tmp[3] = byte(got >> 24)
|
||||
want := binary.LittleEndian.Uint32(buf[:4])
|
||||
got := uint32(d.crc.Sum64())
|
||||
|
||||
if !bytes.Equal(tmp[:], want) {
|
||||
if got != want {
|
||||
if debugDecoder {
|
||||
println("CRC Check Failed:", tmp[:], "!=", want)
|
||||
printf("CRC check failed: got %08x, want %08x\n", got, want)
|
||||
}
|
||||
return ErrCRCMismatch
|
||||
}
|
||||
if debugDecoder {
|
||||
println("CRC ok", tmp[:])
|
||||
printf("CRC ok %08x\n", got)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -336,7 +339,7 @@ func (d *frameDec) consumeCRC() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// runDecoder will create a sync decoder that will decode a block of data.
|
||||
// runDecoder will run the decoder for the remainder of the frame.
|
||||
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||
saved := d.history.b
|
||||
|
||||
@ -346,12 +349,23 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||
// Store input length, so we only check new data.
|
||||
crcStart := len(dst)
|
||||
d.history.decoders.maxSyncLen = 0
|
||||
if d.o.limitToCap {
|
||||
d.history.decoders.maxSyncLen = uint64(cap(dst) - len(dst))
|
||||
}
|
||||
if d.FrameContentSize != fcsUnknown {
|
||||
d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
|
||||
if !d.o.limitToCap || d.FrameContentSize+uint64(len(dst)) < d.history.decoders.maxSyncLen {
|
||||
d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
|
||||
}
|
||||
if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
|
||||
if debugDecoder {
|
||||
println("maxSyncLen:", d.history.decoders.maxSyncLen, "> maxDecodedSize:", d.o.maxDecodedSize)
|
||||
}
|
||||
return dst, ErrDecoderSizeExceeded
|
||||
}
|
||||
if uint64(cap(dst)) < d.history.decoders.maxSyncLen {
|
||||
if debugDecoder {
|
||||
println("maxSyncLen:", d.history.decoders.maxSyncLen)
|
||||
}
|
||||
if !d.o.limitToCap && uint64(cap(dst)) < d.history.decoders.maxSyncLen {
|
||||
// Alloc for output
|
||||
dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
|
||||
copy(dst2, dst)
|
||||
@ -371,7 +385,13 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
if uint64(len(d.history.b)) > d.o.maxDecodedSize {
|
||||
if uint64(len(d.history.b)-crcStart) > d.o.maxDecodedSize {
|
||||
println("runDecoder: maxDecodedSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.o.maxDecodedSize)
|
||||
err = ErrDecoderSizeExceeded
|
||||
break
|
||||
}
|
||||
if d.o.limitToCap && len(d.history.b) > cap(dst) {
|
||||
println("runDecoder: cap exceeded", uint64(len(d.history.b)), ">", cap(dst))
|
||||
err = ErrDecoderSizeExceeded
|
||||
break
|
||||
}
|
||||
|
7
vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
generated
vendored
7
vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
generated
vendored
@ -21,7 +21,8 @@ type buildDtableAsmContext struct {
|
||||
|
||||
// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable.
|
||||
// Function returns non-zero exit code on error.
|
||||
// go:noescape
|
||||
//
|
||||
//go:noescape
|
||||
func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
|
||||
|
||||
// please keep in sync with _generate/gen_fse.go
|
||||
@ -34,8 +35,8 @@ const (
|
||||
// buildDtable will build the decoding table.
|
||||
func (s *fseDecoder) buildDtable() error {
|
||||
ctx := buildDtableAsmContext{
|
||||
stateTable: (*uint16)(&s.stateTable[0]),
|
||||
norm: (*int16)(&s.norm[0]),
|
||||
stateTable: &s.stateTable[0],
|
||||
norm: &s.norm[0],
|
||||
dt: (*uint64)(&s.dt[0]),
|
||||
}
|
||||
code := buildDtable_asm(s, &ctx)
|
||||
|
1
vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
generated
vendored
1
vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
generated
vendored
@ -1,7 +1,6 @@
|
||||
// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.
|
||||
|
||||
//go:build !appengine && !noasm && gc && !noasm
|
||||
// +build !appengine,!noasm,gc,!noasm
|
||||
|
||||
// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
|
||||
TEXT ·buildDtable_asm(SB), $0-24
|
||||
|
25
vendor/github.com/klauspost/compress/zstd/history.go
generated
vendored
25
vendor/github.com/klauspost/compress/zstd/history.go
generated
vendored
@ -37,26 +37,23 @@ func (h *history) reset() {
|
||||
h.ignoreBuffer = 0
|
||||
h.error = false
|
||||
h.recentOffsets = [3]int{1, 4, 8}
|
||||
if f := h.decoders.litLengths.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
}
|
||||
if f := h.decoders.offsets.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
}
|
||||
if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
}
|
||||
h.decoders.freeDecoders()
|
||||
h.decoders = sequenceDecs{br: h.decoders.br}
|
||||
if h.huffTree != nil {
|
||||
if h.dict == nil || h.dict.litEnc != h.huffTree {
|
||||
huffDecoderPool.Put(h.huffTree)
|
||||
}
|
||||
}
|
||||
h.freeHuffDecoder()
|
||||
h.huffTree = nil
|
||||
h.dict = nil
|
||||
//printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
|
||||
}
|
||||
|
||||
func (h *history) freeHuffDecoder() {
|
||||
if h.huffTree != nil {
|
||||
if h.dict == nil || h.dict.litEnc != h.huffTree {
|
||||
huffDecoderPool.Put(h.huffTree)
|
||||
h.huffTree = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *history) setDict(dict *dict) {
|
||||
if dict == nil {
|
||||
return
|
||||
|
49
vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
generated
vendored
49
vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
generated
vendored
@ -2,12 +2,7 @@
|
||||
|
||||
VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package.
|
||||
|
||||
|
||||
[](https://godoc.org/github.com/cespare/xxhash)
|
||||
[](https://travis-ci.org/cespare/xxhash)
|
||||
|
||||
xxhash is a Go implementation of the 64-bit
|
||||
[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
|
||||
xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
|
||||
high-quality hashing algorithm that is much faster than anything in the Go
|
||||
standard library.
|
||||
|
||||
@ -28,31 +23,49 @@ func (*Digest) WriteString(string) (int, error)
|
||||
func (*Digest) Sum64() uint64
|
||||
```
|
||||
|
||||
This implementation provides a fast pure-Go implementation and an even faster
|
||||
assembly implementation for amd64.
|
||||
The package is written with optimized pure Go and also contains even faster
|
||||
assembly implementations for amd64 and arm64. If desired, the `purego` build tag
|
||||
opts into using the Go code even on those architectures.
|
||||
|
||||
[xxHash]: http://cyan4973.github.io/xxHash/
|
||||
|
||||
## Compatibility
|
||||
|
||||
This package is in a module and the latest code is in version 2 of the module.
|
||||
You need a version of Go with at least "minimal module compatibility" to use
|
||||
github.com/cespare/xxhash/v2:
|
||||
|
||||
* 1.9.7+ for Go 1.9
|
||||
* 1.10.3+ for Go 1.10
|
||||
* Go 1.11 or later
|
||||
|
||||
I recommend using the latest release of Go.
|
||||
|
||||
## Benchmarks
|
||||
|
||||
Here are some quick benchmarks comparing the pure-Go and assembly
|
||||
implementations of Sum64.
|
||||
|
||||
| input size | purego | asm |
|
||||
| --- | --- | --- |
|
||||
| 5 B | 979.66 MB/s | 1291.17 MB/s |
|
||||
| 100 B | 7475.26 MB/s | 7973.40 MB/s |
|
||||
| 4 KB | 17573.46 MB/s | 17602.65 MB/s |
|
||||
| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
|
||||
| input size | purego | asm |
|
||||
| ---------- | --------- | --------- |
|
||||
| 4 B | 1.3 GB/s | 1.2 GB/s |
|
||||
| 16 B | 2.9 GB/s | 3.5 GB/s |
|
||||
| 100 B | 6.9 GB/s | 8.1 GB/s |
|
||||
| 4 KB | 11.7 GB/s | 16.7 GB/s |
|
||||
| 10 MB | 12.0 GB/s | 17.3 GB/s |
|
||||
|
||||
These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
|
||||
the following commands under Go 1.11.2:
|
||||
These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
|
||||
CPU using the following commands under Go 1.19.2:
|
||||
|
||||
```
|
||||
$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
|
||||
$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
|
||||
benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
|
||||
benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
|
||||
```
|
||||
|
||||
## Projects using this package
|
||||
|
||||
- [InfluxDB](https://github.com/influxdata/influxdb)
|
||||
- [Prometheus](https://github.com/prometheus/prometheus)
|
||||
- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
- [FreeCache](https://github.com/coocood/freecache)
|
||||
- [FastCache](https://github.com/VictoriaMetrics/fastcache)
|
||||
|
47
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
generated
vendored
47
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
generated
vendored
@ -18,19 +18,11 @@ const (
|
||||
prime5 uint64 = 2870177450012600261
|
||||
)
|
||||
|
||||
// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
|
||||
// possible in the Go code is worth a small (but measurable) performance boost
|
||||
// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
|
||||
// convenience in the Go code in a few places where we need to intentionally
|
||||
// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
|
||||
// result overflows a uint64).
|
||||
var (
|
||||
prime1v = prime1
|
||||
prime2v = prime2
|
||||
prime3v = prime3
|
||||
prime4v = prime4
|
||||
prime5v = prime5
|
||||
)
|
||||
// Store the primes in an array as well.
|
||||
//
|
||||
// The consts are used when possible in Go code to avoid MOVs but we need a
|
||||
// contiguous array of the assembly code.
|
||||
var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
|
||||
|
||||
// Digest implements hash.Hash64.
|
||||
type Digest struct {
|
||||
@ -52,10 +44,10 @@ func New() *Digest {
|
||||
|
||||
// Reset clears the Digest's state so that it can be reused.
|
||||
func (d *Digest) Reset() {
|
||||
d.v1 = prime1v + prime2
|
||||
d.v1 = primes[0] + prime2
|
||||
d.v2 = prime2
|
||||
d.v3 = 0
|
||||
d.v4 = -prime1v
|
||||
d.v4 = -primes[0]
|
||||
d.total = 0
|
||||
d.n = 0
|
||||
}
|
||||
@ -71,21 +63,23 @@ func (d *Digest) Write(b []byte) (n int, err error) {
|
||||
n = len(b)
|
||||
d.total += uint64(n)
|
||||
|
||||
memleft := d.mem[d.n&(len(d.mem)-1):]
|
||||
|
||||
if d.n+n < 32 {
|
||||
// This new data doesn't even fill the current block.
|
||||
copy(d.mem[d.n:], b)
|
||||
copy(memleft, b)
|
||||
d.n += n
|
||||
return
|
||||
}
|
||||
|
||||
if d.n > 0 {
|
||||
// Finish off the partial block.
|
||||
copy(d.mem[d.n:], b)
|
||||
c := copy(memleft, b)
|
||||
d.v1 = round(d.v1, u64(d.mem[0:8]))
|
||||
d.v2 = round(d.v2, u64(d.mem[8:16]))
|
||||
d.v3 = round(d.v3, u64(d.mem[16:24]))
|
||||
d.v4 = round(d.v4, u64(d.mem[24:32]))
|
||||
b = b[32-d.n:]
|
||||
b = b[c:]
|
||||
d.n = 0
|
||||
}
|
||||
|
||||
@ -135,21 +129,20 @@ func (d *Digest) Sum64() uint64 {
|
||||
|
||||
h += d.total
|
||||
|
||||
i, end := 0, d.n
|
||||
for ; i+8 <= end; i += 8 {
|
||||
k1 := round(0, u64(d.mem[i:i+8]))
|
||||
b := d.mem[:d.n&(len(d.mem)-1)]
|
||||
for ; len(b) >= 8; b = b[8:] {
|
||||
k1 := round(0, u64(b[:8]))
|
||||
h ^= k1
|
||||
h = rol27(h)*prime1 + prime4
|
||||
}
|
||||
if i+4 <= end {
|
||||
h ^= uint64(u32(d.mem[i:i+4])) * prime1
|
||||
if len(b) >= 4 {
|
||||
h ^= uint64(u32(b[:4])) * prime1
|
||||
h = rol23(h)*prime2 + prime3
|
||||
i += 4
|
||||
b = b[4:]
|
||||
}
|
||||
for i < end {
|
||||
h ^= uint64(d.mem[i]) * prime5
|
||||
for ; len(b) > 0; b = b[1:] {
|
||||
h ^= uint64(b[0]) * prime5
|
||||
h = rol11(h) * prime1
|
||||
i++
|
||||
}
|
||||
|
||||
h ^= h >> 33
|
||||
|
308
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
generated
vendored
308
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
generated
vendored
@ -1,3 +1,4 @@
|
||||
//go:build !appengine && gc && !purego && !noasm
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !purego
|
||||
@ -5,212 +6,205 @@
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// Register allocation:
|
||||
// AX h
|
||||
// SI pointer to advance through b
|
||||
// DX n
|
||||
// BX loop end
|
||||
// R8 v1, k1
|
||||
// R9 v2
|
||||
// R10 v3
|
||||
// R11 v4
|
||||
// R12 tmp
|
||||
// R13 prime1v
|
||||
// R14 prime2v
|
||||
// DI prime4v
|
||||
// Registers:
|
||||
#define h AX
|
||||
#define d AX
|
||||
#define p SI // pointer to advance through b
|
||||
#define n DX
|
||||
#define end BX // loop end
|
||||
#define v1 R8
|
||||
#define v2 R9
|
||||
#define v3 R10
|
||||
#define v4 R11
|
||||
#define x R12
|
||||
#define prime1 R13
|
||||
#define prime2 R14
|
||||
#define prime4 DI
|
||||
|
||||
// round reads from and advances the buffer pointer in SI.
|
||||
// It assumes that R13 has prime1v and R14 has prime2v.
|
||||
#define round(r) \
|
||||
MOVQ (SI), R12 \
|
||||
ADDQ $8, SI \
|
||||
IMULQ R14, R12 \
|
||||
ADDQ R12, r \
|
||||
ROLQ $31, r \
|
||||
IMULQ R13, r
|
||||
#define round(acc, x) \
|
||||
IMULQ prime2, x \
|
||||
ADDQ x, acc \
|
||||
ROLQ $31, acc \
|
||||
IMULQ prime1, acc
|
||||
|
||||
// mergeRound applies a merge round on the two registers acc and val.
|
||||
// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
|
||||
#define mergeRound(acc, val) \
|
||||
IMULQ R14, val \
|
||||
ROLQ $31, val \
|
||||
IMULQ R13, val \
|
||||
XORQ val, acc \
|
||||
IMULQ R13, acc \
|
||||
ADDQ DI, acc
|
||||
// round0 performs the operation x = round(0, x).
|
||||
#define round0(x) \
|
||||
IMULQ prime2, x \
|
||||
ROLQ $31, x \
|
||||
IMULQ prime1, x
|
||||
|
||||
// mergeRound applies a merge round on the two registers acc and x.
|
||||
// It assumes that prime1, prime2, and prime4 have been loaded.
|
||||
#define mergeRound(acc, x) \
|
||||
round0(x) \
|
||||
XORQ x, acc \
|
||||
IMULQ prime1, acc \
|
||||
ADDQ prime4, acc
|
||||
|
||||
// blockLoop processes as many 32-byte blocks as possible,
|
||||
// updating v1, v2, v3, and v4. It assumes that there is at least one block
|
||||
// to process.
|
||||
#define blockLoop() \
|
||||
loop: \
|
||||
MOVQ +0(p), x \
|
||||
round(v1, x) \
|
||||
MOVQ +8(p), x \
|
||||
round(v2, x) \
|
||||
MOVQ +16(p), x \
|
||||
round(v3, x) \
|
||||
MOVQ +24(p), x \
|
||||
round(v4, x) \
|
||||
ADDQ $32, p \
|
||||
CMPQ p, end \
|
||||
JLE loop
|
||||
|
||||
// func Sum64(b []byte) uint64
|
||||
TEXT ·Sum64(SB), NOSPLIT, $0-32
|
||||
TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
|
||||
// Load fixed primes.
|
||||
MOVQ ·prime1v(SB), R13
|
||||
MOVQ ·prime2v(SB), R14
|
||||
MOVQ ·prime4v(SB), DI
|
||||
MOVQ ·primes+0(SB), prime1
|
||||
MOVQ ·primes+8(SB), prime2
|
||||
MOVQ ·primes+24(SB), prime4
|
||||
|
||||
// Load slice.
|
||||
MOVQ b_base+0(FP), SI
|
||||
MOVQ b_len+8(FP), DX
|
||||
LEAQ (SI)(DX*1), BX
|
||||
MOVQ b_base+0(FP), p
|
||||
MOVQ b_len+8(FP), n
|
||||
LEAQ (p)(n*1), end
|
||||
|
||||
// The first loop limit will be len(b)-32.
|
||||
SUBQ $32, BX
|
||||
SUBQ $32, end
|
||||
|
||||
// Check whether we have at least one block.
|
||||
CMPQ DX, $32
|
||||
CMPQ n, $32
|
||||
JLT noBlocks
|
||||
|
||||
// Set up initial state (v1, v2, v3, v4).
|
||||
MOVQ R13, R8
|
||||
ADDQ R14, R8
|
||||
MOVQ R14, R9
|
||||
XORQ R10, R10
|
||||
XORQ R11, R11
|
||||
SUBQ R13, R11
|
||||
MOVQ prime1, v1
|
||||
ADDQ prime2, v1
|
||||
MOVQ prime2, v2
|
||||
XORQ v3, v3
|
||||
XORQ v4, v4
|
||||
SUBQ prime1, v4
|
||||
|
||||
// Loop until SI > BX.
|
||||
blockLoop:
|
||||
round(R8)
|
||||
round(R9)
|
||||
round(R10)
|
||||
round(R11)
|
||||
blockLoop()
|
||||
|
||||
CMPQ SI, BX
|
||||
JLE blockLoop
|
||||
MOVQ v1, h
|
||||
ROLQ $1, h
|
||||
MOVQ v2, x
|
||||
ROLQ $7, x
|
||||
ADDQ x, h
|
||||
MOVQ v3, x
|
||||
ROLQ $12, x
|
||||
ADDQ x, h
|
||||
MOVQ v4, x
|
||||
ROLQ $18, x
|
||||
ADDQ x, h
|
||||
|
||||
MOVQ R8, AX
|
||||
ROLQ $1, AX
|
||||
MOVQ R9, R12
|
||||
ROLQ $7, R12
|
||||
ADDQ R12, AX
|
||||
MOVQ R10, R12
|
||||
ROLQ $12, R12
|
||||
ADDQ R12, AX
|
||||
MOVQ R11, R12
|
||||
ROLQ $18, R12
|
||||
ADDQ R12, AX
|
||||
|
||||
mergeRound(AX, R8)
|
||||
mergeRound(AX, R9)
|
||||
mergeRound(AX, R10)
|
||||
mergeRound(AX, R11)
|
||||
mergeRound(h, v1)
|
||||
mergeRound(h, v2)
|
||||
mergeRound(h, v3)
|
||||
mergeRound(h, v4)
|
||||
|
||||
JMP afterBlocks
|
||||
|
||||
noBlocks:
|
||||
MOVQ ·prime5v(SB), AX
|
||||
MOVQ ·primes+32(SB), h
|
||||
|
||||
afterBlocks:
|
||||
ADDQ DX, AX
|
||||
ADDQ n, h
|
||||
|
||||
// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
|
||||
ADDQ $24, BX
|
||||
ADDQ $24, end
|
||||
CMPQ p, end
|
||||
JG try4
|
||||
|
||||
CMPQ SI, BX
|
||||
JG fourByte
|
||||
loop8:
|
||||
MOVQ (p), x
|
||||
ADDQ $8, p
|
||||
round0(x)
|
||||
XORQ x, h
|
||||
ROLQ $27, h
|
||||
IMULQ prime1, h
|
||||
ADDQ prime4, h
|
||||
|
||||
wordLoop:
|
||||
// Calculate k1.
|
||||
MOVQ (SI), R8
|
||||
ADDQ $8, SI
|
||||
IMULQ R14, R8
|
||||
ROLQ $31, R8
|
||||
IMULQ R13, R8
|
||||
CMPQ p, end
|
||||
JLE loop8
|
||||
|
||||
XORQ R8, AX
|
||||
ROLQ $27, AX
|
||||
IMULQ R13, AX
|
||||
ADDQ DI, AX
|
||||
try4:
|
||||
ADDQ $4, end
|
||||
CMPQ p, end
|
||||
JG try1
|
||||
|
||||
CMPQ SI, BX
|
||||
JLE wordLoop
|
||||
MOVL (p), x
|
||||
ADDQ $4, p
|
||||
IMULQ prime1, x
|
||||
XORQ x, h
|
||||
|
||||
fourByte:
|
||||
ADDQ $4, BX
|
||||
CMPQ SI, BX
|
||||
JG singles
|
||||
ROLQ $23, h
|
||||
IMULQ prime2, h
|
||||
ADDQ ·primes+16(SB), h
|
||||
|
||||
MOVL (SI), R8
|
||||
ADDQ $4, SI
|
||||
IMULQ R13, R8
|
||||
XORQ R8, AX
|
||||
|
||||
ROLQ $23, AX
|
||||
IMULQ R14, AX
|
||||
ADDQ ·prime3v(SB), AX
|
||||
|
||||
singles:
|
||||
ADDQ $4, BX
|
||||
CMPQ SI, BX
|
||||
try1:
|
||||
ADDQ $4, end
|
||||
CMPQ p, end
|
||||
JGE finalize
|
||||
|
||||
singlesLoop:
|
||||
MOVBQZX (SI), R12
|
||||
ADDQ $1, SI
|
||||
IMULQ ·prime5v(SB), R12
|
||||
XORQ R12, AX
|
||||
loop1:
|
||||
MOVBQZX (p), x
|
||||
ADDQ $1, p
|
||||
IMULQ ·primes+32(SB), x
|
||||
XORQ x, h
|
||||
ROLQ $11, h
|
||||
IMULQ prime1, h
|
||||
|
||||
ROLQ $11, AX
|
||||
IMULQ R13, AX
|
||||
|
||||
CMPQ SI, BX
|
||||
JL singlesLoop
|
||||
CMPQ p, end
|
||||
JL loop1
|
||||
|
||||
finalize:
|
||||
MOVQ AX, R12
|
||||
SHRQ $33, R12
|
||||
XORQ R12, AX
|
||||
IMULQ R14, AX
|
||||
MOVQ AX, R12
|
||||
SHRQ $29, R12
|
||||
XORQ R12, AX
|
||||
IMULQ ·prime3v(SB), AX
|
||||
MOVQ AX, R12
|
||||
SHRQ $32, R12
|
||||
XORQ R12, AX
|
||||
MOVQ h, x
|
||||
SHRQ $33, x
|
||||
XORQ x, h
|
||||
IMULQ prime2, h
|
||||
MOVQ h, x
|
||||
SHRQ $29, x
|
||||
XORQ x, h
|
||||
IMULQ ·primes+16(SB), h
|
||||
MOVQ h, x
|
||||
SHRQ $32, x
|
||||
XORQ x, h
|
||||
|
||||
MOVQ AX, ret+24(FP)
|
||||
MOVQ h, ret+24(FP)
|
||||
RET
|
||||
|
||||
// writeBlocks uses the same registers as above except that it uses AX to store
|
||||
// the d pointer.
|
||||
|
||||
// func writeBlocks(d *Digest, b []byte) int
|
||||
TEXT ·writeBlocks(SB), NOSPLIT, $0-40
|
||||
TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
|
||||
// Load fixed primes needed for round.
|
||||
MOVQ ·prime1v(SB), R13
|
||||
MOVQ ·prime2v(SB), R14
|
||||
MOVQ ·primes+0(SB), prime1
|
||||
MOVQ ·primes+8(SB), prime2
|
||||
|
||||
// Load slice.
|
||||
MOVQ b_base+8(FP), SI
|
||||
MOVQ b_len+16(FP), DX
|
||||
LEAQ (SI)(DX*1), BX
|
||||
SUBQ $32, BX
|
||||
MOVQ b_base+8(FP), p
|
||||
MOVQ b_len+16(FP), n
|
||||
LEAQ (p)(n*1), end
|
||||
SUBQ $32, end
|
||||
|
||||
// Load vN from d.
|
||||
MOVQ d+0(FP), AX
|
||||
MOVQ 0(AX), R8 // v1
|
||||
MOVQ 8(AX), R9 // v2
|
||||
MOVQ 16(AX), R10 // v3
|
||||
MOVQ 24(AX), R11 // v4
|
||||
MOVQ s+0(FP), d
|
||||
MOVQ 0(d), v1
|
||||
MOVQ 8(d), v2
|
||||
MOVQ 16(d), v3
|
||||
MOVQ 24(d), v4
|
||||
|
||||
// We don't need to check the loop condition here; this function is
|
||||
// always called with at least one block of data to process.
|
||||
blockLoop:
|
||||
round(R8)
|
||||
round(R9)
|
||||
round(R10)
|
||||
round(R11)
|
||||
|
||||
CMPQ SI, BX
|
||||
JLE blockLoop
|
||||
blockLoop()
|
||||
|
||||
// Copy vN back to d.
|
||||
MOVQ R8, 0(AX)
|
||||
MOVQ R9, 8(AX)
|
||||
MOVQ R10, 16(AX)
|
||||
MOVQ R11, 24(AX)
|
||||
MOVQ v1, 0(d)
|
||||
MOVQ v2, 8(d)
|
||||
MOVQ v3, 16(d)
|
||||
MOVQ v4, 24(d)
|
||||
|
||||
// The number of bytes written is SI minus the old base pointer.
|
||||
SUBQ b_base+8(FP), SI
|
||||
MOVQ SI, ret+32(FP)
|
||||
// The number of bytes written is p minus the old base pointer.
|
||||
SUBQ b_base+8(FP), p
|
||||
MOVQ p, ret+32(FP)
|
||||
|
||||
RET
|
||||
|
136
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
generated
vendored
136
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
generated
vendored
@ -1,13 +1,17 @@
|
||||
// +build gc,!purego,!noasm
|
||||
//go:build !appengine && gc && !purego && !noasm
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !purego
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// Register allocation.
|
||||
// Registers:
|
||||
#define digest R1
|
||||
#define h R2 // Return value.
|
||||
#define p R3 // Input pointer.
|
||||
#define len R4
|
||||
#define nblocks R5 // len / 32.
|
||||
#define h R2 // return value
|
||||
#define p R3 // input pointer
|
||||
#define n R4 // input length
|
||||
#define nblocks R5 // n / 32
|
||||
#define prime1 R7
|
||||
#define prime2 R8
|
||||
#define prime3 R9
|
||||
@ -25,60 +29,52 @@
|
||||
#define round(acc, x) \
|
||||
MADD prime2, acc, x, acc \
|
||||
ROR $64-31, acc \
|
||||
MUL prime1, acc \
|
||||
MUL prime1, acc
|
||||
|
||||
// x = round(0, x).
|
||||
// round0 performs the operation x = round(0, x).
|
||||
#define round0(x) \
|
||||
MUL prime2, x \
|
||||
ROR $64-31, x \
|
||||
MUL prime1, x \
|
||||
MUL prime1, x
|
||||
|
||||
#define mergeRound(x) \
|
||||
round0(x) \
|
||||
EOR x, h \
|
||||
MADD h, prime4, prime1, h \
|
||||
#define mergeRound(acc, x) \
|
||||
round0(x) \
|
||||
EOR x, acc \
|
||||
MADD acc, prime4, prime1, acc
|
||||
|
||||
// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
|
||||
#define blocksLoop() \
|
||||
LSR $5, len, nblocks \
|
||||
PCALIGN $16 \
|
||||
loop: \
|
||||
LDP.P 32(p), (x1, x2) \
|
||||
round(v1, x1) \
|
||||
LDP -16(p), (x3, x4) \
|
||||
round(v2, x2) \
|
||||
SUB $1, nblocks \
|
||||
round(v3, x3) \
|
||||
round(v4, x4) \
|
||||
CBNZ nblocks, loop \
|
||||
|
||||
// The primes are repeated here to ensure that they're stored
|
||||
// in a contiguous array, so we can load them with LDP.
|
||||
DATA primes<> +0(SB)/8, $11400714785074694791
|
||||
DATA primes<> +8(SB)/8, $14029467366897019727
|
||||
DATA primes<>+16(SB)/8, $1609587929392839161
|
||||
DATA primes<>+24(SB)/8, $9650029242287828579
|
||||
DATA primes<>+32(SB)/8, $2870177450012600261
|
||||
GLOBL primes<>(SB), NOPTR+RODATA, $40
|
||||
// blockLoop processes as many 32-byte blocks as possible,
|
||||
// updating v1, v2, v3, and v4. It assumes that n >= 32.
|
||||
#define blockLoop() \
|
||||
LSR $5, n, nblocks \
|
||||
PCALIGN $16 \
|
||||
loop: \
|
||||
LDP.P 16(p), (x1, x2) \
|
||||
LDP.P 16(p), (x3, x4) \
|
||||
round(v1, x1) \
|
||||
round(v2, x2) \
|
||||
round(v3, x3) \
|
||||
round(v4, x4) \
|
||||
SUB $1, nblocks \
|
||||
CBNZ nblocks, loop
|
||||
|
||||
// func Sum64(b []byte) uint64
|
||||
TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
|
||||
LDP b_base+0(FP), (p, len)
|
||||
TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
|
||||
LDP b_base+0(FP), (p, n)
|
||||
|
||||
LDP primes<> +0(SB), (prime1, prime2)
|
||||
LDP primes<>+16(SB), (prime3, prime4)
|
||||
MOVD primes<>+32(SB), prime5
|
||||
LDP ·primes+0(SB), (prime1, prime2)
|
||||
LDP ·primes+16(SB), (prime3, prime4)
|
||||
MOVD ·primes+32(SB), prime5
|
||||
|
||||
CMP $32, len
|
||||
CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 }
|
||||
BLO afterLoop
|
||||
CMP $32, n
|
||||
CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
|
||||
BLT afterLoop
|
||||
|
||||
ADD prime1, prime2, v1
|
||||
MOVD prime2, v2
|
||||
MOVD $0, v3
|
||||
NEG prime1, v4
|
||||
|
||||
blocksLoop()
|
||||
blockLoop()
|
||||
|
||||
ROR $64-1, v1, x1
|
||||
ROR $64-7, v2, x2
|
||||
@ -88,71 +84,75 @@ TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
|
||||
ADD x3, x4
|
||||
ADD x2, x4, h
|
||||
|
||||
mergeRound(v1)
|
||||
mergeRound(v2)
|
||||
mergeRound(v3)
|
||||
mergeRound(v4)
|
||||
mergeRound(h, v1)
|
||||
mergeRound(h, v2)
|
||||
mergeRound(h, v3)
|
||||
mergeRound(h, v4)
|
||||
|
||||
afterLoop:
|
||||
ADD len, h
|
||||
ADD n, h
|
||||
|
||||
TBZ $4, len, try8
|
||||
TBZ $4, n, try8
|
||||
LDP.P 16(p), (x1, x2)
|
||||
|
||||
round0(x1)
|
||||
|
||||
// NOTE: here and below, sequencing the EOR after the ROR (using a
|
||||
// rotated register) is worth a small but measurable speedup for small
|
||||
// inputs.
|
||||
ROR $64-27, h
|
||||
EOR x1 @> 64-27, h, h
|
||||
MADD h, prime4, prime1, h
|
||||
|
||||
round0(x2)
|
||||
ROR $64-27, h
|
||||
EOR x2 @> 64-27, h
|
||||
EOR x2 @> 64-27, h, h
|
||||
MADD h, prime4, prime1, h
|
||||
|
||||
try8:
|
||||
TBZ $3, len, try4
|
||||
TBZ $3, n, try4
|
||||
MOVD.P 8(p), x1
|
||||
|
||||
round0(x1)
|
||||
ROR $64-27, h
|
||||
EOR x1 @> 64-27, h
|
||||
EOR x1 @> 64-27, h, h
|
||||
MADD h, prime4, prime1, h
|
||||
|
||||
try4:
|
||||
TBZ $2, len, try2
|
||||
TBZ $2, n, try2
|
||||
MOVWU.P 4(p), x2
|
||||
|
||||
MUL prime1, x2
|
||||
ROR $64-23, h
|
||||
EOR x2 @> 64-23, h
|
||||
EOR x2 @> 64-23, h, h
|
||||
MADD h, prime3, prime2, h
|
||||
|
||||
try2:
|
||||
TBZ $1, len, try1
|
||||
TBZ $1, n, try1
|
||||
MOVHU.P 2(p), x3
|
||||
AND $255, x3, x1
|
||||
LSR $8, x3, x2
|
||||
|
||||
MUL prime5, x1
|
||||
ROR $64-11, h
|
||||
EOR x1 @> 64-11, h
|
||||
EOR x1 @> 64-11, h, h
|
||||
MUL prime1, h
|
||||
|
||||
MUL prime5, x2
|
||||
ROR $64-11, h
|
||||
EOR x2 @> 64-11, h
|
||||
EOR x2 @> 64-11, h, h
|
||||
MUL prime1, h
|
||||
|
||||
try1:
|
||||
TBZ $0, len, end
|
||||
TBZ $0, n, finalize
|
||||
MOVBU (p), x4
|
||||
|
||||
MUL prime5, x4
|
||||
ROR $64-11, h
|
||||
EOR x4 @> 64-11, h
|
||||
EOR x4 @> 64-11, h, h
|
||||
MUL prime1, h
|
||||
|
||||
end:
|
||||
finalize:
|
||||
EOR h >> 33, h
|
||||
MUL prime2, h
|
||||
EOR h >> 29, h
|
||||
@ -163,24 +163,22 @@ end:
|
||||
RET
|
||||
|
||||
// func writeBlocks(d *Digest, b []byte) int
|
||||
//
|
||||
// Assumes len(b) >= 32.
|
||||
TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
|
||||
LDP primes<>(SB), (prime1, prime2)
|
||||
TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
|
||||
LDP ·primes+0(SB), (prime1, prime2)
|
||||
|
||||
// Load state. Assume v[1-4] are stored contiguously.
|
||||
MOVD d+0(FP), digest
|
||||
LDP 0(digest), (v1, v2)
|
||||
LDP 16(digest), (v3, v4)
|
||||
|
||||
LDP b_base+8(FP), (p, len)
|
||||
LDP b_base+8(FP), (p, n)
|
||||
|
||||
blocksLoop()
|
||||
blockLoop()
|
||||
|
||||
// Store updated state.
|
||||
STP (v1, v2), 0(digest)
|
||||
STP (v3, v4), 16(digest)
|
||||
|
||||
BIC $31, len
|
||||
MOVD len, ret+32(FP)
|
||||
BIC $31, n
|
||||
MOVD n, ret+32(FP)
|
||||
RET
|
||||
|
2
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
generated
vendored
2
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
generated
vendored
@ -13,4 +13,4 @@ package xxhash
|
||||
func Sum64(b []byte) uint64
|
||||
|
||||
//go:noescape
|
||||
func writeBlocks(d *Digest, b []byte) int
|
||||
func writeBlocks(s *Digest, b []byte) int
|
||||
|
19
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
generated
vendored
19
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
generated
vendored
@ -15,10 +15,10 @@ func Sum64(b []byte) uint64 {
|
||||
var h uint64
|
||||
|
||||
if n >= 32 {
|
||||
v1 := prime1v + prime2
|
||||
v1 := primes[0] + prime2
|
||||
v2 := prime2
|
||||
v3 := uint64(0)
|
||||
v4 := -prime1v
|
||||
v4 := -primes[0]
|
||||
for len(b) >= 32 {
|
||||
v1 = round(v1, u64(b[0:8:len(b)]))
|
||||
v2 = round(v2, u64(b[8:16:len(b)]))
|
||||
@ -37,19 +37,18 @@ func Sum64(b []byte) uint64 {
|
||||
|
||||
h += uint64(n)
|
||||
|
||||
i, end := 0, len(b)
|
||||
for ; i+8 <= end; i += 8 {
|
||||
k1 := round(0, u64(b[i:i+8:len(b)]))
|
||||
for ; len(b) >= 8; b = b[8:] {
|
||||
k1 := round(0, u64(b[:8]))
|
||||
h ^= k1
|
||||
h = rol27(h)*prime1 + prime4
|
||||
}
|
||||
if i+4 <= end {
|
||||
h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
|
||||
if len(b) >= 4 {
|
||||
h ^= uint64(u32(b[:4])) * prime1
|
||||
h = rol23(h)*prime2 + prime3
|
||||
i += 4
|
||||
b = b[4:]
|
||||
}
|
||||
for ; i < end; i++ {
|
||||
h ^= uint64(b[i]) * prime5
|
||||
for ; len(b) > 0; b = b[1:] {
|
||||
h ^= uint64(b[0]) * prime5
|
||||
h = rol11(h) * prime1
|
||||
}
|
||||
|
||||
|
22
vendor/github.com/klauspost/compress/zstd/seqdec.go
generated
vendored
22
vendor/github.com/klauspost/compress/zstd/seqdec.go
generated
vendored
@ -99,6 +99,21 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *sequenceDecs) freeDecoders() {
|
||||
if f := s.litLengths.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
s.litLengths.fse = nil
|
||||
}
|
||||
if f := s.offsets.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
s.offsets.fse = nil
|
||||
}
|
||||
if f := s.matchLengths.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
s.matchLengths.fse = nil
|
||||
}
|
||||
}
|
||||
|
||||
// execute will execute the decoded sequence with the provided history.
|
||||
// The sequence must be evaluated before being sent.
|
||||
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
|
||||
@ -299,7 +314,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
|
||||
}
|
||||
size := ll + ml + len(out)
|
||||
if size-startSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
|
||||
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
if size > cap(out) {
|
||||
// Not enough size, which can happen under high volume block streaming conditions
|
||||
@ -409,9 +424,8 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Check if space for literals
|
||||
if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
|
||||
if size := len(s.literals) + len(out) - startSize; size > maxBlockSize {
|
||||
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
|
||||
// Add final literals
|
||||
|
42
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
generated
vendored
42
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
generated
vendored
@ -32,18 +32,22 @@ type decodeSyncAsmContext struct {
|
||||
// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
|
||||
//
|
||||
// Please refer to seqdec_generic.go for the reference implementation.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||
|
||||
// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||
|
||||
// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||
|
||||
// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||
|
||||
@ -55,16 +59,22 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
|
||||
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
|
||||
return false, nil
|
||||
}
|
||||
useSafe := false
|
||||
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
|
||||
useSafe = true
|
||||
}
|
||||
if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
|
||||
useSafe = true
|
||||
}
|
||||
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
|
||||
useSafe = true
|
||||
}
|
||||
|
||||
// FIXME: Using unsafe memory copies leads to rare, random crashes
|
||||
// with fuzz testing. It is therefore disabled for now.
|
||||
const useSafe = true
|
||||
/*
|
||||
useSafe := false
|
||||
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
|
||||
useSafe = true
|
||||
}
|
||||
if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
|
||||
useSafe = true
|
||||
}
|
||||
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
|
||||
useSafe = true
|
||||
}
|
||||
*/
|
||||
|
||||
br := s.br
|
||||
|
||||
@ -129,7 +139,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
|
||||
if debugDecoder {
|
||||
println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
|
||||
}
|
||||
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
|
||||
return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
|
||||
default:
|
||||
return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
|
||||
@ -137,7 +147,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
|
||||
|
||||
s.seqSize += ctx.litRemain
|
||||
if s.seqSize > maxBlockSize {
|
||||
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||
return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
err := br.close()
|
||||
if err != nil {
|
||||
@ -195,20 +205,24 @@ const errorNotEnoughSpace = 5
|
||||
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
|
||||
//
|
||||
// Please refer to seqdec_generic.go for the reference implementation.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
|
||||
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
|
||||
//
|
||||
// Please refer to seqdec_generic.go for the reference implementation.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
|
||||
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
|
||||
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
|
||||
@ -275,7 +289,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||
|
||||
s.seqSize += ctx.litRemain
|
||||
if s.seqSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
err := br.close()
|
||||
if err != nil {
|
||||
@ -302,10 +316,12 @@ type executeAsmContext struct {
|
||||
// Returns false if a match offset is too big.
|
||||
//
|
||||
// Please refer to seqdec_generic.go for the reference implementation.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
|
||||
|
||||
// Same as above, but with safe memcopies
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
|
||||
|
||||
|
489
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
generated
vendored
489
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
generated
vendored
@ -1,7 +1,6 @@
|
||||
// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT.
|
||||
|
||||
//go:build !appengine && !noasm && gc && !noasm
|
||||
// +build !appengine,!noasm,gc,!noasm
|
||||
|
||||
// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
// Requires: CMOV
|
||||
@ -52,34 +51,46 @@ sequenceDecs_decode_amd64_fill_byte_by_byte:
|
||||
|
||||
sequenceDecs_decode_amd64_fill_end:
|
||||
// Update offset
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, 16(R10)
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_amd64_of_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_amd64_of_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_amd64_of_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_amd64_of_update_zero:
|
||||
MOVQ AX, 16(R10)
|
||||
|
||||
// Update match length
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, 8(R10)
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_amd64_ml_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_amd64_ml_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_amd64_ml_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_amd64_ml_update_zero:
|
||||
MOVQ AX, 8(R10)
|
||||
|
||||
// Fill bitreader to have enough for the remaining
|
||||
CMPQ SI, $0x08
|
||||
@ -107,19 +118,25 @@ sequenceDecs_decode_amd64_fill_2_byte_by_byte:
|
||||
|
||||
sequenceDecs_decode_amd64_fill_2_end:
|
||||
// Update literal length
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, (R10)
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_amd64_ll_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_amd64_ll_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_amd64_ll_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_amd64_ll_update_zero:
|
||||
MOVQ AX, (R10)
|
||||
|
||||
// Fill bitreader for state updates
|
||||
MOVQ R14, (SP)
|
||||
@ -198,7 +215,7 @@ sequenceDecs_decode_amd64_skip_update:
|
||||
MOVQ R12, R13
|
||||
MOVQ R11, R12
|
||||
MOVQ CX, R11
|
||||
JMP sequenceDecs_decode_amd64_adjust_end
|
||||
JMP sequenceDecs_decode_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decode_amd64_adjust_offsetB_1_or_0:
|
||||
CMPQ (R10), $0x00000000
|
||||
@ -210,7 +227,7 @@ sequenceDecs_decode_amd64_adjust_offset_maybezero:
|
||||
TESTQ CX, CX
|
||||
JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero
|
||||
MOVQ R11, CX
|
||||
JMP sequenceDecs_decode_amd64_adjust_end
|
||||
JMP sequenceDecs_decode_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decode_amd64_adjust_offset_nonzero:
|
||||
CMPQ CX, $0x01
|
||||
@ -247,7 +264,7 @@ sequenceDecs_decode_amd64_adjust_temp_valid:
|
||||
MOVQ AX, R11
|
||||
MOVQ AX, CX
|
||||
|
||||
sequenceDecs_decode_amd64_adjust_end:
|
||||
sequenceDecs_decode_amd64_after_adjust:
|
||||
MOVQ CX, 16(R10)
|
||||
|
||||
// Check values
|
||||
@ -303,10 +320,6 @@ error_not_enough_literals:
|
||||
MOVQ $0x00000004, ret+24(FP)
|
||||
RET
|
||||
|
||||
// Return with not enough output space error
|
||||
MOVQ $0x00000005, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
// Requires: CMOV
|
||||
TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
|
||||
@ -356,49 +369,67 @@ sequenceDecs_decode_56_amd64_fill_byte_by_byte:
|
||||
|
||||
sequenceDecs_decode_56_amd64_fill_end:
|
||||
// Update offset
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, 16(R10)
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_56_amd64_of_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_56_amd64_of_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_56_amd64_of_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_56_amd64_of_update_zero:
|
||||
MOVQ AX, 16(R10)
|
||||
|
||||
// Update match length
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, 8(R10)
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_56_amd64_ml_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_56_amd64_ml_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_56_amd64_ml_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_56_amd64_ml_update_zero:
|
||||
MOVQ AX, 8(R10)
|
||||
|
||||
// Update literal length
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, (R10)
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_56_amd64_ll_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_56_amd64_ll_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_56_amd64_ll_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_56_amd64_ll_update_zero:
|
||||
MOVQ AX, (R10)
|
||||
|
||||
// Fill bitreader for state updates
|
||||
MOVQ R14, (SP)
|
||||
@ -477,7 +508,7 @@ sequenceDecs_decode_56_amd64_skip_update:
|
||||
MOVQ R12, R13
|
||||
MOVQ R11, R12
|
||||
MOVQ CX, R11
|
||||
JMP sequenceDecs_decode_56_amd64_adjust_end
|
||||
JMP sequenceDecs_decode_56_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0:
|
||||
CMPQ (R10), $0x00000000
|
||||
@ -489,7 +520,7 @@ sequenceDecs_decode_56_amd64_adjust_offset_maybezero:
|
||||
TESTQ CX, CX
|
||||
JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero
|
||||
MOVQ R11, CX
|
||||
JMP sequenceDecs_decode_56_amd64_adjust_end
|
||||
JMP sequenceDecs_decode_56_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decode_56_amd64_adjust_offset_nonzero:
|
||||
CMPQ CX, $0x01
|
||||
@ -526,7 +557,7 @@ sequenceDecs_decode_56_amd64_adjust_temp_valid:
|
||||
MOVQ AX, R11
|
||||
MOVQ AX, CX
|
||||
|
||||
sequenceDecs_decode_56_amd64_adjust_end:
|
||||
sequenceDecs_decode_56_amd64_after_adjust:
|
||||
MOVQ CX, 16(R10)
|
||||
|
||||
// Check values
|
||||
@ -582,10 +613,6 @@ error_not_enough_literals:
|
||||
MOVQ $0x00000004, ret+24(FP)
|
||||
RET
|
||||
|
||||
// Return with not enough output space error
|
||||
MOVQ $0x00000005, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
// Requires: BMI, BMI2, CMOV
|
||||
TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
|
||||
@ -757,7 +784,7 @@ sequenceDecs_decode_bmi2_skip_update:
|
||||
MOVQ R11, R12
|
||||
MOVQ R10, R11
|
||||
MOVQ CX, R10
|
||||
JMP sequenceDecs_decode_bmi2_adjust_end
|
||||
JMP sequenceDecs_decode_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0:
|
||||
CMPQ (R9), $0x00000000
|
||||
@ -769,7 +796,7 @@ sequenceDecs_decode_bmi2_adjust_offset_maybezero:
|
||||
TESTQ CX, CX
|
||||
JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero
|
||||
MOVQ R10, CX
|
||||
JMP sequenceDecs_decode_bmi2_adjust_end
|
||||
JMP sequenceDecs_decode_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decode_bmi2_adjust_offset_nonzero:
|
||||
CMPQ CX, $0x01
|
||||
@ -806,7 +833,7 @@ sequenceDecs_decode_bmi2_adjust_temp_valid:
|
||||
MOVQ R13, R10
|
||||
MOVQ R13, CX
|
||||
|
||||
sequenceDecs_decode_bmi2_adjust_end:
|
||||
sequenceDecs_decode_bmi2_after_adjust:
|
||||
MOVQ CX, 16(R9)
|
||||
|
||||
// Check values
|
||||
@ -862,10 +889,6 @@ error_not_enough_literals:
|
||||
MOVQ $0x00000004, ret+24(FP)
|
||||
RET
|
||||
|
||||
// Return with not enough output space error
|
||||
MOVQ $0x00000005, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
// Requires: BMI, BMI2, CMOV
|
||||
TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
|
||||
@ -1012,7 +1035,7 @@ sequenceDecs_decode_56_bmi2_skip_update:
|
||||
MOVQ R11, R12
|
||||
MOVQ R10, R11
|
||||
MOVQ CX, R10
|
||||
JMP sequenceDecs_decode_56_bmi2_adjust_end
|
||||
JMP sequenceDecs_decode_56_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0:
|
||||
CMPQ (R9), $0x00000000
|
||||
@ -1024,7 +1047,7 @@ sequenceDecs_decode_56_bmi2_adjust_offset_maybezero:
|
||||
TESTQ CX, CX
|
||||
JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero
|
||||
MOVQ R10, CX
|
||||
JMP sequenceDecs_decode_56_bmi2_adjust_end
|
||||
JMP sequenceDecs_decode_56_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decode_56_bmi2_adjust_offset_nonzero:
|
||||
CMPQ CX, $0x01
|
||||
@ -1061,7 +1084,7 @@ sequenceDecs_decode_56_bmi2_adjust_temp_valid:
|
||||
MOVQ R13, R10
|
||||
MOVQ R13, CX
|
||||
|
||||
sequenceDecs_decode_56_bmi2_adjust_end:
|
||||
sequenceDecs_decode_56_bmi2_after_adjust:
|
||||
MOVQ CX, 16(R9)
|
||||
|
||||
// Check values
|
||||
@ -1117,10 +1140,6 @@ error_not_enough_literals:
|
||||
MOVQ $0x00000004, ret+24(FP)
|
||||
RET
|
||||
|
||||
// Return with not enough output space error
|
||||
MOVQ $0x00000005, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
|
||||
// Requires: SSE
|
||||
TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
|
||||
@ -1354,8 +1373,7 @@ loop_finished:
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ DX, 24(AX)
|
||||
MOVQ DI, 104(AX)
|
||||
MOVQ 80(AX), CX
|
||||
SUBQ CX, SI
|
||||
SUBQ 80(AX), SI
|
||||
MOVQ SI, 112(AX)
|
||||
RET
|
||||
|
||||
@ -1367,8 +1385,7 @@ error_match_off_too_big:
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ DX, 24(AX)
|
||||
MOVQ DI, 104(AX)
|
||||
MOVQ 80(AX), CX
|
||||
SUBQ CX, SI
|
||||
SUBQ 80(AX), SI
|
||||
MOVQ SI, 112(AX)
|
||||
RET
|
||||
|
||||
@ -1712,8 +1729,7 @@ loop_finished:
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ DX, 24(AX)
|
||||
MOVQ DI, 104(AX)
|
||||
MOVQ 80(AX), CX
|
||||
SUBQ CX, SI
|
||||
SUBQ 80(AX), SI
|
||||
MOVQ SI, 112(AX)
|
||||
RET
|
||||
|
||||
@ -1725,8 +1741,7 @@ error_match_off_too_big:
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ DX, 24(AX)
|
||||
MOVQ DI, 104(AX)
|
||||
MOVQ 80(AX), CX
|
||||
SUBQ CX, SI
|
||||
SUBQ 80(AX), SI
|
||||
MOVQ SI, 112(AX)
|
||||
RET
|
||||
|
||||
@ -1749,6 +1764,10 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
|
||||
MOVQ 72(AX), DI
|
||||
MOVQ 80(AX), R8
|
||||
MOVQ 88(AX), R9
|
||||
XORQ CX, CX
|
||||
MOVQ CX, 8(SP)
|
||||
MOVQ CX, 16(SP)
|
||||
MOVQ CX, 24(SP)
|
||||
MOVQ 112(AX), R10
|
||||
MOVQ 128(AX), CX
|
||||
MOVQ CX, 32(SP)
|
||||
@ -1798,34 +1817,46 @@ sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
|
||||
|
||||
sequenceDecs_decodeSync_amd64_fill_end:
|
||||
// Update offset
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 8(SP)
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_amd64_of_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_amd64_of_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_amd64_of_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_amd64_of_update_zero:
|
||||
MOVQ AX, 8(SP)
|
||||
|
||||
// Update match length
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 16(SP)
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_amd64_ml_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_amd64_ml_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_amd64_ml_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_amd64_ml_update_zero:
|
||||
MOVQ AX, 16(SP)
|
||||
|
||||
// Fill bitreader to have enough for the remaining
|
||||
CMPQ SI, $0x08
|
||||
@ -1853,19 +1884,25 @@ sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
|
||||
|
||||
sequenceDecs_decodeSync_amd64_fill_2_end:
|
||||
// Update literal length
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 24(SP)
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_amd64_ll_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_amd64_ll_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_amd64_ll_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_amd64_ll_update_zero:
|
||||
MOVQ AX, 24(SP)
|
||||
|
||||
// Fill bitreader for state updates
|
||||
MOVQ R13, (SP)
|
||||
@ -1945,7 +1982,7 @@ sequenceDecs_decodeSync_amd64_skip_update:
|
||||
MOVUPS 144(CX), X0
|
||||
MOVQ R13, 144(CX)
|
||||
MOVUPS X0, 152(CX)
|
||||
JMP sequenceDecs_decodeSync_amd64_adjust_end
|
||||
JMP sequenceDecs_decodeSync_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0:
|
||||
CMPQ 24(SP), $0x00000000
|
||||
@ -1957,7 +1994,7 @@ sequenceDecs_decodeSync_amd64_adjust_offset_maybezero:
|
||||
TESTQ R13, R13
|
||||
JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero
|
||||
MOVQ 144(CX), R13
|
||||
JMP sequenceDecs_decodeSync_amd64_adjust_end
|
||||
JMP sequenceDecs_decodeSync_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_amd64_adjust_offset_nonzero:
|
||||
MOVQ R13, AX
|
||||
@ -1966,8 +2003,7 @@ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero:
|
||||
CMPQ R13, $0x03
|
||||
CMOVQEQ R14, AX
|
||||
CMOVQEQ R15, R14
|
||||
LEAQ 144(CX), R15
|
||||
ADDQ (R15)(AX*8), R14
|
||||
ADDQ 144(CX)(AX*8), R14
|
||||
JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid
|
||||
MOVQ $0x00000001, R14
|
||||
|
||||
@ -1983,7 +2019,7 @@ sequenceDecs_decodeSync_amd64_adjust_skip:
|
||||
MOVQ R14, 144(CX)
|
||||
MOVQ R14, R13
|
||||
|
||||
sequenceDecs_decodeSync_amd64_adjust_end:
|
||||
sequenceDecs_decodeSync_amd64_after_adjust:
|
||||
MOVQ R13, 8(SP)
|
||||
|
||||
// Check values
|
||||
@ -2280,6 +2316,10 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
|
||||
MOVQ 72(CX), SI
|
||||
MOVQ 80(CX), DI
|
||||
MOVQ 88(CX), R8
|
||||
XORQ R9, R9
|
||||
MOVQ R9, 8(SP)
|
||||
MOVQ R9, 16(SP)
|
||||
MOVQ R9, 24(SP)
|
||||
MOVQ 112(CX), R9
|
||||
MOVQ 128(CX), R10
|
||||
MOVQ R10, 32(SP)
|
||||
@ -2452,7 +2492,7 @@ sequenceDecs_decodeSync_bmi2_skip_update:
|
||||
MOVUPS 144(CX), X0
|
||||
MOVQ R13, 144(CX)
|
||||
MOVUPS X0, 152(CX)
|
||||
JMP sequenceDecs_decodeSync_bmi2_adjust_end
|
||||
JMP sequenceDecs_decodeSync_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0:
|
||||
CMPQ 24(SP), $0x00000000
|
||||
@ -2464,7 +2504,7 @@ sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero:
|
||||
TESTQ R13, R13
|
||||
JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero
|
||||
MOVQ 144(CX), R13
|
||||
JMP sequenceDecs_decodeSync_bmi2_adjust_end
|
||||
JMP sequenceDecs_decodeSync_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero:
|
||||
MOVQ R13, R12
|
||||
@ -2473,8 +2513,7 @@ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero:
|
||||
CMPQ R13, $0x03
|
||||
CMOVQEQ R14, R12
|
||||
CMOVQEQ R15, R14
|
||||
LEAQ 144(CX), R15
|
||||
ADDQ (R15)(R12*8), R14
|
||||
ADDQ 144(CX)(R12*8), R14
|
||||
JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid
|
||||
MOVQ $0x00000001, R14
|
||||
|
||||
@ -2490,7 +2529,7 @@ sequenceDecs_decodeSync_bmi2_adjust_skip:
|
||||
MOVQ R14, 144(CX)
|
||||
MOVQ R14, R13
|
||||
|
||||
sequenceDecs_decodeSync_bmi2_adjust_end:
|
||||
sequenceDecs_decodeSync_bmi2_after_adjust:
|
||||
MOVQ R13, 8(SP)
|
||||
|
||||
// Check values
|
||||
@ -2787,6 +2826,10 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
|
||||
MOVQ 72(AX), DI
|
||||
MOVQ 80(AX), R8
|
||||
MOVQ 88(AX), R9
|
||||
XORQ CX, CX
|
||||
MOVQ CX, 8(SP)
|
||||
MOVQ CX, 16(SP)
|
||||
MOVQ CX, 24(SP)
|
||||
MOVQ 112(AX), R10
|
||||
MOVQ 128(AX), CX
|
||||
MOVQ CX, 32(SP)
|
||||
@ -2836,34 +2879,46 @@ sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_fill_end:
|
||||
// Update offset
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 8(SP)
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_safe_amd64_of_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_safe_amd64_of_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_safe_amd64_of_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_of_update_zero:
|
||||
MOVQ AX, 8(SP)
|
||||
|
||||
// Update match length
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 16(SP)
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_safe_amd64_ml_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_safe_amd64_ml_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_safe_amd64_ml_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_ml_update_zero:
|
||||
MOVQ AX, 16(SP)
|
||||
|
||||
// Fill bitreader to have enough for the remaining
|
||||
CMPQ SI, $0x08
|
||||
@ -2891,19 +2946,25 @@ sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_fill_2_end:
|
||||
// Update literal length
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 24(SP)
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_safe_amd64_ll_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_safe_amd64_ll_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_safe_amd64_ll_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
|
||||
MOVQ AX, 24(SP)
|
||||
|
||||
// Fill bitreader for state updates
|
||||
MOVQ R13, (SP)
|
||||
@ -2983,7 +3044,7 @@ sequenceDecs_decodeSync_safe_amd64_skip_update:
|
||||
MOVUPS 144(CX), X0
|
||||
MOVQ R13, 144(CX)
|
||||
MOVUPS X0, 152(CX)
|
||||
JMP sequenceDecs_decodeSync_safe_amd64_adjust_end
|
||||
JMP sequenceDecs_decodeSync_safe_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0:
|
||||
CMPQ 24(SP), $0x00000000
|
||||
@ -2995,7 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero:
|
||||
TESTQ R13, R13
|
||||
JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero
|
||||
MOVQ 144(CX), R13
|
||||
JMP sequenceDecs_decodeSync_safe_amd64_adjust_end
|
||||
JMP sequenceDecs_decodeSync_safe_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero:
|
||||
MOVQ R13, AX
|
||||
@ -3004,8 +3065,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero:
|
||||
CMPQ R13, $0x03
|
||||
CMOVQEQ R14, AX
|
||||
CMOVQEQ R15, R14
|
||||
LEAQ 144(CX), R15
|
||||
ADDQ (R15)(AX*8), R14
|
||||
ADDQ 144(CX)(AX*8), R14
|
||||
JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid
|
||||
MOVQ $0x00000001, R14
|
||||
|
||||
@ -3021,7 +3081,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_skip:
|
||||
MOVQ R14, 144(CX)
|
||||
MOVQ R14, R13
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_adjust_end:
|
||||
sequenceDecs_decodeSync_safe_amd64_after_adjust:
|
||||
MOVQ R13, 8(SP)
|
||||
|
||||
// Check values
|
||||
@ -3420,6 +3480,10 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
|
||||
MOVQ 72(CX), SI
|
||||
MOVQ 80(CX), DI
|
||||
MOVQ 88(CX), R8
|
||||
XORQ R9, R9
|
||||
MOVQ R9, 8(SP)
|
||||
MOVQ R9, 16(SP)
|
||||
MOVQ R9, 24(SP)
|
||||
MOVQ 112(CX), R9
|
||||
MOVQ 128(CX), R10
|
||||
MOVQ R10, 32(SP)
|
||||
@ -3592,7 +3656,7 @@ sequenceDecs_decodeSync_safe_bmi2_skip_update:
|
||||
MOVUPS 144(CX), X0
|
||||
MOVQ R13, 144(CX)
|
||||
MOVUPS X0, 152(CX)
|
||||
JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end
|
||||
JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0:
|
||||
CMPQ 24(SP), $0x00000000
|
||||
@ -3604,7 +3668,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero:
|
||||
TESTQ R13, R13
|
||||
JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero
|
||||
MOVQ 144(CX), R13
|
||||
JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end
|
||||
JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero:
|
||||
MOVQ R13, R12
|
||||
@ -3613,8 +3677,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero:
|
||||
CMPQ R13, $0x03
|
||||
CMOVQEQ R14, R12
|
||||
CMOVQEQ R15, R14
|
||||
LEAQ 144(CX), R15
|
||||
ADDQ (R15)(R12*8), R14
|
||||
ADDQ 144(CX)(R12*8), R14
|
||||
JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid
|
||||
MOVQ $0x00000001, R14
|
||||
|
||||
@ -3630,7 +3693,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_skip:
|
||||
MOVQ R14, 144(CX)
|
||||
MOVQ R14, R13
|
||||
|
||||
sequenceDecs_decodeSync_safe_bmi2_adjust_end:
|
||||
sequenceDecs_decodeSync_safe_bmi2_after_adjust:
|
||||
MOVQ R13, 8(SP)
|
||||
|
||||
// Check values
|
||||
|
4
vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
generated
vendored
4
vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
generated
vendored
@ -111,7 +111,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||
}
|
||||
s.seqSize += ll + ml
|
||||
if s.seqSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
litRemain -= ll
|
||||
if litRemain < 0 {
|
||||
@ -149,7 +149,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||
}
|
||||
s.seqSize += litRemain
|
||||
if s.seqSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
err := br.close()
|
||||
if err != nil {
|
||||
|
31
vendor/github.com/klauspost/compress/zstd/zstd.go
generated
vendored
31
vendor/github.com/klauspost/compress/zstd/zstd.go
generated
vendored
@ -36,9 +36,6 @@ const forcePreDef = false
|
||||
// zstdMinMatch is the minimum zstd match length.
|
||||
const zstdMinMatch = 3
|
||||
|
||||
// Reset the buffer offset when reaching this.
|
||||
const bufferReset = math.MaxInt32 - MaxWindowSize
|
||||
|
||||
// fcsUnknown is used for unknown frame content size.
|
||||
const fcsUnknown = math.MaxUint64
|
||||
|
||||
@ -75,7 +72,6 @@ var (
|
||||
ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
|
||||
|
||||
// ErrUnknownDictionary is returned if the dictionary ID is unknown.
|
||||
// For the time being dictionaries are not supported.
|
||||
ErrUnknownDictionary = errors.New("unknown dictionary")
|
||||
|
||||
// ErrFrameSizeExceeded is returned if the stated frame size is exceeded.
|
||||
@ -110,26 +106,25 @@ func printf(format string, a ...interface{}) {
|
||||
}
|
||||
}
|
||||
|
||||
// matchLen returns the maximum length.
|
||||
// matchLen returns the maximum common prefix length of a and b.
|
||||
// a must be the shortest of the two.
|
||||
// The function also returns whether all bytes matched.
|
||||
func matchLen(a, b []byte) int {
|
||||
b = b[:len(a)]
|
||||
for i := 0; i < len(a)-7; i += 8 {
|
||||
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
|
||||
return i + (bits.TrailingZeros64(diff) >> 3)
|
||||
func matchLen(a, b []byte) (n int) {
|
||||
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
|
||||
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
|
||||
if diff != 0 {
|
||||
return n + bits.TrailingZeros64(diff)>>3
|
||||
}
|
||||
n += 8
|
||||
}
|
||||
|
||||
checked := (len(a) >> 3) << 3
|
||||
a = a[checked:]
|
||||
b = b[checked:]
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return i + checked
|
||||
break
|
||||
}
|
||||
n++
|
||||
}
|
||||
return len(a) + checked
|
||||
return n
|
||||
|
||||
}
|
||||
|
||||
func load3232(b []byte, i int32) uint32 {
|
||||
@ -140,10 +135,6 @@ func load6432(b []byte, i int32) uint64 {
|
||||
return binary.LittleEndian.Uint64(b[i:])
|
||||
}
|
||||
|
||||
func load64(b []byte, i int) uint64 {
|
||||
return binary.LittleEndian.Uint64(b[i:])
|
||||
}
|
||||
|
||||
type byter interface {
|
||||
Bytes() []byte
|
||||
Len() int
|
||||
|
Reference in New Issue
Block a user