Initial

Signed-off-by: Alex Ellis (OpenFaaS Ltd) <alexellis2@gmail.com>
2025-06-25 00:03:23 +00:00 · 2019-12-20 12:56:11 +00:00
parent 9a30ff517f
commit 5c45242b3d
1379 changed files with 633020 additions and 0 deletions
--- a/vendor/golang.org/x/text/internal/triegen/compact.go
+++ b/vendor/golang.org/x/text/internal/triegen/compact.go
@ -0,0 +1,58 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package triegen
+
+// This file defines Compacter and its implementations.
+
+import "io"
+
+// A Compacter generates an alternative, more space-efficient way to store a
+// trie value block. A trie value block holds all possible values for the last
+// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block
+// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0).
+type Compacter interface {
+	// Size returns whether the Compacter could encode the given block as well
+	// as its size in case it can. len(v) is always 64.
+	Size(v []uint64) (sz int, ok bool)
+
+	// Store stores the block using the Compacter's compression method.
+	// It returns a handle with which the block can be retrieved.
+	// len(v) is always 64.
+	Store(v []uint64) uint32
+
+	// Print writes the data structures associated to the given store to w.
+	Print(w io.Writer) error
+
+	// Handler returns the name of a function that gets called during trie
+	// lookup for blocks generated by the Compacter. The function should be of
+	// the form func (n uint32, b byte) uint64, where n is the index returned by
+	// the Compacter's Store method and b is the last byte of the UTF-8
+	// encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the
+	// block.
+	Handler() string
+}
+
+// simpleCompacter is the default Compacter used by builder. It implements a
+// normal trie block.
+type simpleCompacter builder
+
+func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) {
+	return blockSize * b.ValueSize, true
+}
+
+func (b *simpleCompacter) Store(v []uint64) uint32 {
+	h := uint32(len(b.ValueBlocks) - blockOffset)
+	b.ValueBlocks = append(b.ValueBlocks, v)
+	return h
+}
+
+func (b *simpleCompacter) Print(io.Writer) error {
+	// Structures are printed in print.go.
+	return nil
+}
+
+func (b *simpleCompacter) Handler() string {
+	panic("Handler should be special-cased for this Compacter")
+}
--- a/vendor/golang.org/x/text/internal/triegen/print.go
+++ b/vendor/golang.org/x/text/internal/triegen/print.go
@ -0,0 +1,251 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package triegen
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"strings"
+	"text/template"
+)
+
+// print writes all the data structures as well as the code necessary to use the
+// trie to w.
+func (b *builder) print(w io.Writer) error {
+	b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize
+	b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize
+	b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize
+	b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize
+	b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize
+
+	// If we only have one root trie, all starter blocks are at position 0 and
+	// we can access the arrays directly.
+	if len(b.Trie) == 1 {
+		// At this point we cannot refer to the generated tables directly.
+		b.ASCIIBlock = b.Name + "Values"
+		b.StarterBlock = b.Name + "Index"
+	} else {
+		// Otherwise we need to have explicit starter indexes in the trie
+		// structure.
+		b.ASCIIBlock = "t.ascii"
+		b.StarterBlock = "t.utf8Start"
+	}
+
+	b.SourceType = "[]byte"
+	if err := lookupGen.Execute(w, b); err != nil {
+		return err
+	}
+
+	b.SourceType = "string"
+	if err := lookupGen.Execute(w, b); err != nil {
+		return err
+	}
+
+	if err := trieGen.Execute(w, b); err != nil {
+		return err
+	}
+
+	for _, c := range b.Compactions {
+		if err := c.c.Print(w); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func printValues(n int, values []uint64) string {
+	w := &bytes.Buffer{}
+	boff := n * blockSize
+	fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff)
+	var newline bool
+	for i, v := range values {
+		if i%6 == 0 {
+			newline = true
+		}
+		if v != 0 {
+			if newline {
+				fmt.Fprintf(w, "\n")
+				newline = false
+			}
+			fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v)
+		}
+	}
+	return w.String()
+}
+
+func printIndex(b *builder, nr int, n *node) string {
+	w := &bytes.Buffer{}
+	boff := nr * blockSize
+	fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff)
+	var newline bool
+	for i, c := range n.children {
+		if i%8 == 0 {
+			newline = true
+		}
+		if c != nil {
+			v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index)
+			if v != 0 {
+				if newline {
+					fmt.Fprintf(w, "\n")
+					newline = false
+				}
+				fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v)
+			}
+		}
+	}
+	return w.String()
+}
+
+var (
+	trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{
+		"printValues": printValues,
+		"printIndex":  printIndex,
+		"title":       strings.Title,
+		"dec":         func(x int) int { return x - 1 },
+		"psize": func(n int) string {
+			return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024)
+		},
+	}).Parse(trieTemplate))
+	lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate))
+)
+
+// TODO: consider the return type of lookup. It could be uint64, even if the
+// internal value type is smaller. We will have to verify this with the
+// performance of unicode/norm, which is very sensitive to such changes.
+const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}}
+// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}.
+type {{.Name}}Trie struct { {{if $multi}}
+	ascii []{{.ValueType}} // index for ASCII bytes
+	utf8Start  []{{.IndexType}} // index for UTF-8 bytes >= 0xC0
+{{end}}}
+
+func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}}
+	h := {{.Name}}TrieHandles[i]
+	return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] }
+}
+
+type {{.Name}}TrieHandle struct {
+	ascii, multi {{.IndexType}}
+}
+
+// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes
+var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{
+{{range .Trie}}	{ {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}}
+{{end}}}{{else}}
+	return &{{.Name}}Trie{}
+}
+{{end}}
+// lookupValue determines the type of block n and looks up the value for b.
+func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} {
+	switch { {{range $i, $c := .Compactions}}
+		{{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}}
+			n -= {{$c.Offset}}{{end}}
+			return {{print $b.ValueType}}({{$c.Handler}}){{end}}
+	}
+}
+
+// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes
+// The third block is the zero block.
+var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} {
+{{range $i, $v := .ValueBlocks}}{{printValues $i $v}}
+{{end}}}
+
+// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes
+// Block 0 is the zero block.
+var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} {
+{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}}
+{{end}}}
+`
+
+// TODO: consider allowing zero-length strings after evaluating performance with
+// unicode/norm.
+const lookupTemplate = `
+// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and
+// the width in bytes of this encoding. The size will be 0 if s does not
+// hold enough bytes to complete the encoding. len(s) must be greater than 0.
+func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) {
+	c0 := s[0]
+	switch {
+	case c0 < 0x80: // is ASCII
+		return {{.ASCIIBlock}}[c0], 1
+	case c0 < 0xC2:
+		return 0, 1  // Illegal UTF-8: not a starter, not ASCII.
+	case c0 < 0xE0: // 2-byte UTF-8
+		if len(s) < 2 {
+			return 0, 0
+		}
+		i := {{.StarterBlock}}[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return 0, 1 // Illegal UTF-8: not a continuation byte.
+		}
+		return t.lookupValue(uint32(i), c1), 2
+	case c0 < 0xF0: // 3-byte UTF-8
+		if len(s) < 3 {
+			return 0, 0
+		}
+		i := {{.StarterBlock}}[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return 0, 1 // Illegal UTF-8: not a continuation byte.
+		}
+		o := uint32(i)<<6 + uint32(c1)
+		i = {{.Name}}Index[o]
+		c2 := s[2]
+		if c2 < 0x80 || 0xC0 <= c2 {
+			return 0, 2 // Illegal UTF-8: not a continuation byte.
+		}
+		return t.lookupValue(uint32(i), c2), 3
+	case c0 < 0xF8: // 4-byte UTF-8
+		if len(s) < 4 {
+			return 0, 0
+		}
+		i := {{.StarterBlock}}[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return 0, 1 // Illegal UTF-8: not a continuation byte.
+		}
+		o := uint32(i)<<6 + uint32(c1)
+		i = {{.Name}}Index[o]
+		c2 := s[2]
+		if c2 < 0x80 || 0xC0 <= c2 {
+			return 0, 2 // Illegal UTF-8: not a continuation byte.
+		}
+		o = uint32(i)<<6 + uint32(c2)
+		i = {{.Name}}Index[o]
+		c3 := s[3]
+		if c3 < 0x80 || 0xC0 <= c3 {
+			return 0, 3 // Illegal UTF-8: not a continuation byte.
+		}
+		return t.lookupValue(uint32(i), c3), 4
+	}
+	// Illegal rune
+	return 0, 1
+}
+
+// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s.
+// s must start with a full and valid UTF-8 encoded rune.
+func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} {
+	c0 := s[0]
+	if c0 < 0x80 { // is ASCII
+		return {{.ASCIIBlock}}[c0]
+	}
+	i := {{.StarterBlock}}[c0]
+	if c0 < 0xE0 { // 2-byte UTF-8
+		return t.lookupValue(uint32(i), s[1])
+	}
+	i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])]
+	if c0 < 0xF0 { // 3-byte UTF-8
+		return t.lookupValue(uint32(i), s[2])
+	}
+	i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])]
+	if c0 < 0xF8 { // 4-byte UTF-8
+		return t.lookupValue(uint32(i), s[3])
+	}
+	return 0
+}
+`
--- a/vendor/golang.org/x/text/internal/triegen/triegen.go
+++ b/vendor/golang.org/x/text/internal/triegen/triegen.go
@ -0,0 +1,494 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package triegen implements a code generator for a trie for associating
+// unsigned integer values with UTF-8 encoded runes.
+//
+// Many of the go.text packages use tries for storing per-rune information.  A
+// trie is especially useful if many of the runes have the same value. If this
+// is the case, many blocks can be expected to be shared allowing for
+// information on many runes to be stored in little space.
+//
+// As most of the lookups are done directly on []byte slices, the tries use the
+// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to
+// runes and contributes a little bit to better performance. It also naturally
+// provides a fast path for ASCII.
+//
+// Space is also an issue. There are many code points defined in Unicode and as
+// a result tables can get quite large. So every byte counts. The triegen
+// package automatically chooses the smallest integer values to represent the
+// tables. Compacters allow further compression of the trie by allowing for
+// alternative representations of individual trie blocks.
+//
+// triegen allows generating multiple tries as a single structure. This is
+// useful when, for example, one wants to generate tries for several languages
+// that have a lot of values in common. Some existing libraries for
+// internationalization store all per-language data as a dynamically loadable
+// chunk. The go.text packages are designed with the assumption that the user
+// typically wants to compile in support for all supported languages, in line
+// with the approach common to Go to create a single standalone binary. The
+// multi-root trie approach can give significant storage savings in this
+// scenario.
+//
+// triegen generates both tables and code. The code is optimized to use the
+// automatically chosen data types. The following code is generated for a Trie
+// or multiple Tries named "foo":
+//	- type fooTrie
+//		The trie type.
+//
+//	- func newFooTrie(x int) *fooTrie
+//		Trie constructor, where x is the index of the trie passed to Gen.
+//
+//	- func (t *fooTrie) lookup(s []byte) (v uintX, sz int)
+//		The lookup method, where uintX is automatically chosen.
+//
+//	- func lookupString, lookupUnsafe and lookupStringUnsafe
+//		Variants of the above.
+//
+//	- var fooValues and fooIndex and any tables generated by Compacters.
+//		The core trie data.
+//
+//	- var fooTrieHandles
+//		Indexes of starter blocks in case of multiple trie roots.
+//
+// It is recommended that users test the generated trie by checking the returned
+// value for every rune. Such exhaustive tests are possible as the number of
+// runes in Unicode is limited.
+package triegen // import "golang.org/x/text/internal/triegen"
+
+// TODO: Arguably, the internally optimized data types would not have to be
+// exposed in the generated API. We could also investigate not generating the
+// code, but using it through a package. We would have to investigate the impact
+// on performance of making such change, though. For packages like unicode/norm,
+// small changes like this could tank performance.
+
+import (
+	"encoding/binary"
+	"fmt"
+	"hash/crc64"
+	"io"
+	"log"
+	"unicode/utf8"
+)
+
+// builder builds a set of tries for associating values with runes. The set of
+// tries can share common index and value blocks.
+type builder struct {
+	Name string
+
+	// ValueType is the type of the trie values looked up.
+	ValueType string
+
+	// ValueSize is the byte size of the ValueType.
+	ValueSize int
+
+	// IndexType is the type of trie index values used for all UTF-8 bytes of
+	// a rune except the last one.
+	IndexType string
+
+	// IndexSize is the byte size of the IndexType.
+	IndexSize int
+
+	// SourceType is used when generating the lookup functions. If the user
+	// requests StringSupport, all lookup functions will be generated for
+	// string input as well.
+	SourceType string
+
+	Trie []*Trie
+
+	IndexBlocks []*node
+	ValueBlocks [][]uint64
+	Compactions []compaction
+	Checksum    uint64
+
+	ASCIIBlock   string
+	StarterBlock string
+
+	indexBlockIdx map[uint64]int
+	valueBlockIdx map[uint64]nodeIndex
+	asciiBlockIdx map[uint64]int
+
+	// Stats are used to fill out the template.
+	Stats struct {
+		NValueEntries int
+		NValueBytes   int
+		NIndexEntries int
+		NIndexBytes   int
+		NHandleBytes  int
+	}
+
+	err error
+}
+
+// A nodeIndex encodes the index of a node, which is defined by the compaction
+// which stores it and an index within the compaction. For internal nodes, the
+// compaction is always 0.
+type nodeIndex struct {
+	compaction int
+	index      int
+}
+
+// compaction keeps track of stats used for the compaction.
+type compaction struct {
+	c         Compacter
+	blocks    []*node
+	maxHandle uint32
+	totalSize int
+
+	// Used by template-based generator and thus exported.
+	Cutoff  uint32
+	Offset  uint32
+	Handler string
+}
+
+func (b *builder) setError(err error) {
+	if b.err == nil {
+		b.err = err
+	}
+}
+
+// An Option can be passed to Gen.
+type Option func(b *builder) error
+
+// Compact configures the trie generator to use the given Compacter.
+func Compact(c Compacter) Option {
+	return func(b *builder) error {
+		b.Compactions = append(b.Compactions, compaction{
+			c:       c,
+			Handler: c.Handler() + "(n, b)"})
+		return nil
+	}
+}
+
+// Gen writes Go code for a shared trie lookup structure to w for the given
+// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will
+// return the *nameTrie for tries[x]. A value can be looked up by using one of
+// the various lookup methods defined on nameTrie. It returns the table size of
+// the generated trie.
+func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {
+	// The index contains two dummy blocks, followed by the zero block. The zero
+	// block is at offset 0x80, so that the offset for the zero block for
+	// continuation bytes is 0.
+	b := &builder{
+		Name:        name,
+		Trie:        tries,
+		IndexBlocks: []*node{{}, {}, {}},
+		Compactions: []compaction{{
+			Handler: name + "Values[n<<6+uint32(b)]",
+		}},
+		// The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero
+		// block.
+		indexBlockIdx: map[uint64]int{0: 0},
+		valueBlockIdx: map[uint64]nodeIndex{0: {}},
+		asciiBlockIdx: map[uint64]int{},
+	}
+	b.Compactions[0].c = (*simpleCompacter)(b)
+
+	for _, f := range opts {
+		if err := f(b); err != nil {
+			return 0, err
+		}
+	}
+	b.build()
+	if b.err != nil {
+		return 0, b.err
+	}
+	if err = b.print(w); err != nil {
+		return 0, err
+	}
+	return b.Size(), nil
+}
+
+// A Trie represents a single root node of a trie. A builder may build several
+// overlapping tries at once.
+type Trie struct {
+	root *node
+
+	hiddenTrie
+}
+
+// hiddenTrie contains values we want to be visible to the template generator,
+// but hidden from the API documentation.
+type hiddenTrie struct {
+	Name         string
+	Checksum     uint64
+	ASCIIIndex   int
+	StarterIndex int
+}
+
+// NewTrie returns a new trie root.
+func NewTrie(name string) *Trie {
+	return &Trie{
+		&node{
+			children: make([]*node, blockSize),
+			values:   make([]uint64, utf8.RuneSelf),
+		},
+		hiddenTrie{Name: name},
+	}
+}
+
+// Gen is a convenience wrapper around the Gen func passing t as the only trie
+// and uses the name passed to NewTrie. It returns the size of the generated
+// tables.
+func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {
+	return Gen(w, t.Name, []*Trie{t}, opts...)
+}
+
+// node is a node of the intermediate trie structure.
+type node struct {
+	// children holds this node's children. It is always of length 64.
+	// A child node may be nil.
+	children []*node
+
+	// values contains the values of this node. If it is non-nil, this node is
+	// either a root or leaf node:
+	// For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].
+	// For leaf nodes, len(values) ==  64 and it maps the bytes in [0x80, 0xBF].
+	values []uint64
+
+	index nodeIndex
+}
+
+// Insert associates value with the given rune. Insert will panic if a non-zero
+// value is passed for an invalid rune.
+func (t *Trie) Insert(r rune, value uint64) {
+	if value == 0 {
+		return
+	}
+	s := string(r)
+	if []rune(s)[0] != r && value != 0 {
+		// Note: The UCD tables will always assign what amounts to a zero value
+		// to a surrogate. Allowing a zero value for an illegal rune allows
+		// users to iterate over [0..MaxRune] without having to explicitly
+		// exclude surrogates, which would be tedious.
+		panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))
+	}
+	if len(s) == 1 {
+		// It is a root node value (ASCII).
+		t.root.values[s[0]] = value
+		return
+	}
+
+	n := t.root
+	for ; len(s) > 1; s = s[1:] {
+		if n.children == nil {
+			n.children = make([]*node, blockSize)
+		}
+		p := s[0] % blockSize
+		c := n.children[p]
+		if c == nil {
+			c = &node{}
+			n.children[p] = c
+		}
+		if len(s) > 2 && c.values != nil {
+			log.Fatalf("triegen: insert(%U): found internal node with values", r)
+		}
+		n = c
+	}
+	if n.values == nil {
+		n.values = make([]uint64, blockSize)
+	}
+	if n.children != nil {
+		log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)
+	}
+	n.values[s[0]-0x80] = value
+}
+
+// Size returns the number of bytes the generated trie will take to store. It
+// needs to be exported as it is used in the templates.
+func (b *builder) Size() int {
+	// Index blocks.
+	sz := len(b.IndexBlocks) * blockSize * b.IndexSize
+
+	// Skip the first compaction, which represents the normal value blocks, as
+	// its totalSize does not account for the ASCII blocks, which are managed
+	// separately.
+	sz += len(b.ValueBlocks) * blockSize * b.ValueSize
+	for _, c := range b.Compactions[1:] {
+		sz += c.totalSize
+	}
+
+	// TODO: this computation does not account for the fixed overhead of a using
+	// a compaction, either code or data. As for data, though, the typical
+	// overhead of data is in the order of bytes (2 bytes for cases). Further,
+	// the savings of using a compaction should anyway be substantial for it to
+	// be worth it.
+
+	// For multi-root tries, we also need to account for the handles.
+	if len(b.Trie) > 1 {
+		sz += 2 * b.IndexSize * len(b.Trie)
+	}
+	return sz
+}
+
+func (b *builder) build() {
+	// Compute the sizes of the values.
+	var vmax uint64
+	for _, t := range b.Trie {
+		vmax = maxValue(t.root, vmax)
+	}
+	b.ValueType, b.ValueSize = getIntType(vmax)
+
+	// Compute all block allocations.
+	// TODO: first compute the ASCII blocks for all tries and then the other
+	// nodes. ASCII blocks are more restricted in placement, as they require two
+	// blocks to be placed consecutively. Processing them first may improve
+	// sharing (at least one zero block can be expected to be saved.)
+	for _, t := range b.Trie {
+		b.Checksum += b.buildTrie(t)
+	}
+
+	// Compute the offsets for all the Compacters.
+	offset := uint32(0)
+	for i := range b.Compactions {
+		c := &b.Compactions[i]
+		c.Offset = offset
+		offset += c.maxHandle + 1
+		c.Cutoff = offset
+	}
+
+	// Compute the sizes of indexes.
+	// TODO: different byte positions could have different sizes. So far we have
+	// not found a case where this is beneficial.
+	imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)
+	for _, ib := range b.IndexBlocks {
+		if x := uint64(ib.index.index); x > imax {
+			imax = x
+		}
+	}
+	b.IndexType, b.IndexSize = getIntType(imax)
+}
+
+func maxValue(n *node, max uint64) uint64 {
+	if n == nil {
+		return max
+	}
+	for _, c := range n.children {
+		max = maxValue(c, max)
+	}
+	for _, v := range n.values {
+		if max < v {
+			max = v
+		}
+	}
+	return max
+}
+
+func getIntType(v uint64) (string, int) {
+	switch {
+	case v < 1<<8:
+		return "uint8", 1
+	case v < 1<<16:
+		return "uint16", 2
+	case v < 1<<32:
+		return "uint32", 4
+	}
+	return "uint64", 8
+}
+
+const (
+	blockSize = 64
+
+	// Subtract two blocks to offset 0x80, the first continuation byte.
+	blockOffset = 2
+
+	// Subtract three blocks to offset 0xC0, the first non-ASCII starter.
+	rootBlockOffset = 3
+)
+
+var crcTable = crc64.MakeTable(crc64.ISO)
+
+func (b *builder) buildTrie(t *Trie) uint64 {
+	n := t.root
+
+	// Get the ASCII offset. For the first trie, the ASCII block will be at
+	// position 0.
+	hasher := crc64.New(crcTable)
+	binary.Write(hasher, binary.BigEndian, n.values)
+	hash := hasher.Sum64()
+
+	v, ok := b.asciiBlockIdx[hash]
+	if !ok {
+		v = len(b.ValueBlocks)
+		b.asciiBlockIdx[hash] = v
+
+		b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])
+		if v == 0 {
+			// Add the zero block at position 2 so that it will be assigned a
+			// zero reference in the lookup blocks.
+			// TODO: always do this? This would allow us to remove a check from
+			// the trie lookup, but at the expense of extra space. Analyze
+			// performance for unicode/norm.
+			b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))
+		}
+	}
+	t.ASCIIIndex = v
+
+	// Compute remaining offsets.
+	t.Checksum = b.computeOffsets(n, true)
+	// We already subtracted the normal blockOffset from the index. Subtract the
+	// difference for starter bytes.
+	t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)
+	return t.Checksum
+}
+
+func (b *builder) computeOffsets(n *node, root bool) uint64 {
+	// For the first trie, the root lookup block will be at position 3, which is
+	// the offset for UTF-8 non-ASCII starter bytes.
+	first := len(b.IndexBlocks) == rootBlockOffset
+	if first {
+		b.IndexBlocks = append(b.IndexBlocks, n)
+	}
+
+	// We special-case the cases where all values recursively are 0. This allows
+	// for the use of a zero block to which all such values can be directed.
+	hash := uint64(0)
+	if n.children != nil || n.values != nil {
+		hasher := crc64.New(crcTable)
+		for _, c := range n.children {
+			var v uint64
+			if c != nil {
+				v = b.computeOffsets(c, false)
+			}
+			binary.Write(hasher, binary.BigEndian, v)
+		}
+		binary.Write(hasher, binary.BigEndian, n.values)
+		hash = hasher.Sum64()
+	}
+
+	if first {
+		b.indexBlockIdx[hash] = rootBlockOffset - blockOffset
+	}
+
+	// Compacters don't apply to internal nodes.
+	if n.children != nil {
+		v, ok := b.indexBlockIdx[hash]
+		if !ok {
+			v = len(b.IndexBlocks) - blockOffset
+			b.IndexBlocks = append(b.IndexBlocks, n)
+			b.indexBlockIdx[hash] = v
+		}
+		n.index = nodeIndex{0, v}
+	} else {
+		h, ok := b.valueBlockIdx[hash]
+		if !ok {
+			bestI, bestSize := 0, blockSize*b.ValueSize
+			for i, c := range b.Compactions[1:] {
+				if sz, ok := c.c.Size(n.values); ok && bestSize > sz {
+					bestI, bestSize = i+1, sz
+				}
+			}
+			c := &b.Compactions[bestI]
+			c.totalSize += bestSize
+			v := c.c.Store(n.values)
+			if c.maxHandle < v {
+				c.maxHandle = v
+			}
+			h = nodeIndex{bestI, int(v)}
+			b.valueBlockIdx[hash] = h
+		}
+		n.index = h
+	}
+	return hash
+}