mirror of
https://github.com/openfaas/faasd.git
synced 2025-06-23 23:33:23 +00:00
Initial
Signed-off-by: Alex Ellis (OpenFaaS Ltd) <alexellis2@gmail.com>
This commit is contained in:
371
vendor/golang.org/x/text/internal/colltab/collelem.go
generated
vendored
Normal file
371
vendor/golang.org/x/text/internal/colltab/collelem.go
generated
vendored
Normal file
@ -0,0 +1,371 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package colltab
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// Level identifies the collation comparison level.
|
||||
// The primary level corresponds to the basic sorting of text.
|
||||
// The secondary level corresponds to accents and related linguistic elements.
|
||||
// The tertiary level corresponds to casing and related concepts.
|
||||
// The quaternary level is derived from the other levels by the
|
||||
// various algorithms for handling variable elements.
|
||||
type Level int
|
||||
|
||||
const (
|
||||
Primary Level = iota
|
||||
Secondary
|
||||
Tertiary
|
||||
Quaternary
|
||||
Identity
|
||||
|
||||
NumLevels
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSecondary = 0x20
|
||||
defaultTertiary = 0x2
|
||||
maxTertiary = 0x1F
|
||||
MaxQuaternary = 0x1FFFFF // 21 bits.
|
||||
)
|
||||
|
||||
// Elem is a representation of a collation element. This API provides ways to encode
|
||||
// and decode Elems. Implementations of collation tables may use values greater
|
||||
// or equal to PrivateUse for their own purposes. However, these should never be
|
||||
// returned by AppendNext.
|
||||
type Elem uint32
|
||||
|
||||
const (
|
||||
maxCE Elem = 0xAFFFFFFF
|
||||
PrivateUse = minContract
|
||||
minContract = 0xC0000000
|
||||
maxContract = 0xDFFFFFFF
|
||||
minExpand = 0xE0000000
|
||||
maxExpand = 0xEFFFFFFF
|
||||
minDecomp = 0xF0000000
|
||||
)
|
||||
|
||||
type ceType int
|
||||
|
||||
const (
|
||||
ceNormal ceType = iota // ceNormal includes implicits (ce == 0)
|
||||
ceContractionIndex // rune can be a start of a contraction
|
||||
ceExpansionIndex // rune expands into a sequence of collation elements
|
||||
ceDecompose // rune expands using NFKC decomposition
|
||||
)
|
||||
|
||||
func (ce Elem) ctype() ceType {
|
||||
if ce <= maxCE {
|
||||
return ceNormal
|
||||
}
|
||||
if ce <= maxContract {
|
||||
return ceContractionIndex
|
||||
} else {
|
||||
if ce <= maxExpand {
|
||||
return ceExpansionIndex
|
||||
}
|
||||
return ceDecompose
|
||||
}
|
||||
panic("should not reach here")
|
||||
return ceType(-1)
|
||||
}
|
||||
|
||||
// For normal collation elements, we assume that a collation element either has
|
||||
// a primary or non-default secondary value, not both.
|
||||
// Collation elements with a primary value are of the form
|
||||
// 01pppppp pppppppp ppppppp0 ssssssss
|
||||
// - p* is primary collation value
|
||||
// - s* is the secondary collation value
|
||||
// 00pppppp pppppppp ppppppps sssttttt, where
|
||||
// - p* is primary collation value
|
||||
// - s* offset of secondary from default value.
|
||||
// - t* is the tertiary collation value
|
||||
// 100ttttt cccccccc pppppppp pppppppp
|
||||
// - t* is the tertiar collation value
|
||||
// - c* is the canonical combining class
|
||||
// - p* is the primary collation value
|
||||
// Collation elements with a secondary value are of the form
|
||||
// 1010cccc ccccssss ssssssss tttttttt, where
|
||||
// - c* is the canonical combining class
|
||||
// - s* is the secondary collation value
|
||||
// - t* is the tertiary collation value
|
||||
// 11qqqqqq qqqqqqqq qqqqqqq0 00000000
|
||||
// - q* quaternary value
|
||||
const (
|
||||
ceTypeMask = 0xC0000000
|
||||
ceTypeMaskExt = 0xE0000000
|
||||
ceIgnoreMask = 0xF00FFFFF
|
||||
ceType1 = 0x40000000
|
||||
ceType2 = 0x00000000
|
||||
ceType3or4 = 0x80000000
|
||||
ceType4 = 0xA0000000
|
||||
ceTypeQ = 0xC0000000
|
||||
Ignore = ceType4
|
||||
firstNonPrimary = 0x80000000
|
||||
lastSpecialPrimary = 0xA0000000
|
||||
secondaryMask = 0x80000000
|
||||
hasTertiaryMask = 0x40000000
|
||||
primaryValueMask = 0x3FFFFE00
|
||||
maxPrimaryBits = 21
|
||||
compactPrimaryBits = 16
|
||||
maxSecondaryBits = 12
|
||||
maxTertiaryBits = 8
|
||||
maxCCCBits = 8
|
||||
maxSecondaryCompactBits = 8
|
||||
maxSecondaryDiffBits = 4
|
||||
maxTertiaryCompactBits = 5
|
||||
primaryShift = 9
|
||||
compactSecondaryShift = 5
|
||||
minCompactSecondary = defaultSecondary - 4
|
||||
)
|
||||
|
||||
func makeImplicitCE(primary int) Elem {
|
||||
return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
|
||||
}
|
||||
|
||||
// MakeElem returns an Elem for the given values. It will return an error
|
||||
// if the given combination of values is invalid.
|
||||
func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
|
||||
if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
|
||||
return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
|
||||
}
|
||||
if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
|
||||
return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
|
||||
}
|
||||
if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
|
||||
return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
|
||||
}
|
||||
ce := Elem(0)
|
||||
if primary != 0 {
|
||||
if ccc != 0 {
|
||||
if primary >= 1<<compactPrimaryBits {
|
||||
return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
|
||||
}
|
||||
if secondary != defaultSecondary {
|
||||
return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
|
||||
}
|
||||
ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
|
||||
ce |= Elem(ccc) << compactPrimaryBits
|
||||
ce |= Elem(primary)
|
||||
ce |= ceType3or4
|
||||
} else if tertiary == defaultTertiary {
|
||||
if secondary >= 1<<maxSecondaryCompactBits {
|
||||
return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
|
||||
}
|
||||
ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
|
||||
ce |= ceType1
|
||||
} else {
|
||||
d := secondary - defaultSecondary + maxSecondaryDiffBits
|
||||
if d >= 1<<maxSecondaryDiffBits || d < 0 {
|
||||
return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
|
||||
}
|
||||
if tertiary >= 1<<maxTertiaryCompactBits {
|
||||
return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
|
||||
}
|
||||
ce = Elem(primary<<maxSecondaryDiffBits + d)
|
||||
ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
|
||||
}
|
||||
} else {
|
||||
ce = Elem(secondary<<maxTertiaryBits + tertiary)
|
||||
ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
|
||||
ce |= ceType4
|
||||
}
|
||||
return ce, nil
|
||||
}
|
||||
|
||||
// MakeQuaternary returns an Elem with the given quaternary value.
|
||||
func MakeQuaternary(v int) Elem {
|
||||
return ceTypeQ | Elem(v<<primaryShift)
|
||||
}
|
||||
|
||||
// Mask sets weights for any level smaller than l to 0.
|
||||
// The resulting Elem can be used to test for equality with
|
||||
// other Elems to which the same mask has been applied.
|
||||
func (ce Elem) Mask(l Level) uint32 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// CCC returns the canonical combining class associated with the underlying character,
|
||||
// if applicable, or 0 otherwise.
|
||||
func (ce Elem) CCC() uint8 {
|
||||
if ce&ceType3or4 != 0 {
|
||||
if ce&ceType4 == ceType3or4 {
|
||||
return uint8(ce >> 16)
|
||||
}
|
||||
return uint8(ce >> 20)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Primary returns the primary collation weight for ce.
|
||||
func (ce Elem) Primary() int {
|
||||
if ce >= firstNonPrimary {
|
||||
if ce > lastSpecialPrimary {
|
||||
return 0
|
||||
}
|
||||
return int(uint16(ce))
|
||||
}
|
||||
return int(ce&primaryValueMask) >> primaryShift
|
||||
}
|
||||
|
||||
// Secondary returns the secondary collation weight for ce.
|
||||
func (ce Elem) Secondary() int {
|
||||
switch ce & ceTypeMask {
|
||||
case ceType1:
|
||||
return int(uint8(ce))
|
||||
case ceType2:
|
||||
return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
|
||||
case ceType3or4:
|
||||
if ce < ceType4 {
|
||||
return defaultSecondary
|
||||
}
|
||||
return int(ce>>8) & 0xFFF
|
||||
case ceTypeQ:
|
||||
return 0
|
||||
}
|
||||
panic("should not reach here")
|
||||
}
|
||||
|
||||
// Tertiary returns the tertiary collation weight for ce.
|
||||
func (ce Elem) Tertiary() uint8 {
|
||||
if ce&hasTertiaryMask == 0 {
|
||||
if ce&ceType3or4 == 0 {
|
||||
return uint8(ce & 0x1F)
|
||||
}
|
||||
if ce&ceType4 == ceType4 {
|
||||
return uint8(ce)
|
||||
}
|
||||
return uint8(ce>>24) & 0x1F // type 2
|
||||
} else if ce&ceTypeMask == ceType1 {
|
||||
return defaultTertiary
|
||||
}
|
||||
// ce is a quaternary value.
|
||||
return 0
|
||||
}
|
||||
|
||||
func (ce Elem) updateTertiary(t uint8) Elem {
|
||||
if ce&ceTypeMask == ceType1 {
|
||||
// convert to type 4
|
||||
nce := ce & primaryValueMask
|
||||
nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
|
||||
ce = nce
|
||||
} else if ce&ceTypeMaskExt == ceType3or4 {
|
||||
ce &= ^Elem(maxTertiary << 24)
|
||||
return ce | (Elem(t) << 24)
|
||||
} else {
|
||||
// type 2 or 4
|
||||
ce &= ^Elem(maxTertiary)
|
||||
}
|
||||
return ce | Elem(t)
|
||||
}
|
||||
|
||||
// Quaternary returns the quaternary value if explicitly specified,
|
||||
// 0 if ce == Ignore, or MaxQuaternary otherwise.
|
||||
// Quaternary values are used only for shifted variants.
|
||||
func (ce Elem) Quaternary() int {
|
||||
if ce&ceTypeMask == ceTypeQ {
|
||||
return int(ce&primaryValueMask) >> primaryShift
|
||||
} else if ce&ceIgnoreMask == Ignore {
|
||||
return 0
|
||||
}
|
||||
return MaxQuaternary
|
||||
}
|
||||
|
||||
// Weight returns the collation weight for the given level.
|
||||
func (ce Elem) Weight(l Level) int {
|
||||
switch l {
|
||||
case Primary:
|
||||
return ce.Primary()
|
||||
case Secondary:
|
||||
return ce.Secondary()
|
||||
case Tertiary:
|
||||
return int(ce.Tertiary())
|
||||
case Quaternary:
|
||||
return ce.Quaternary()
|
||||
}
|
||||
return 0 // return 0 (ignore) for undefined levels.
|
||||
}
|
||||
|
||||
// For contractions, collation elements are of the form
|
||||
// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
|
||||
// - n* is the size of the first node in the contraction trie.
|
||||
// - i* is the index of the first node in the contraction trie.
|
||||
// - b* is the offset into the contraction collation element table.
|
||||
// See contract.go for details on the contraction trie.
|
||||
const (
|
||||
maxNBits = 4
|
||||
maxTrieIndexBits = 12
|
||||
maxContractOffsetBits = 13
|
||||
)
|
||||
|
||||
func splitContractIndex(ce Elem) (index, n, offset int) {
|
||||
n = int(ce & (1<<maxNBits - 1))
|
||||
ce >>= maxNBits
|
||||
index = int(ce & (1<<maxTrieIndexBits - 1))
|
||||
ce >>= maxTrieIndexBits
|
||||
offset = int(ce & (1<<maxContractOffsetBits - 1))
|
||||
return
|
||||
}
|
||||
|
||||
// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
|
||||
// where b* is the index into the expansion sequence table.
|
||||
const maxExpandIndexBits = 16
|
||||
|
||||
func splitExpandIndex(ce Elem) (index int) {
|
||||
return int(uint16(ce))
|
||||
}
|
||||
|
||||
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
|
||||
// sequence of collation elements, we decompose the rune and lookup the collation
|
||||
// elements for each rune in the decomposition and modify the tertiary weights.
|
||||
// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
|
||||
// - v* is the replacement tertiary weight for the first rune,
|
||||
// - w* is the replacement tertiary weight for the second rune,
|
||||
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
|
||||
// See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
|
||||
func splitDecompose(ce Elem) (t1, t2 uint8) {
|
||||
return uint8(ce), uint8(ce >> 8)
|
||||
}
|
||||
|
||||
const (
|
||||
// These constants were taken from https://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
|
||||
minUnified rune = 0x4E00
|
||||
maxUnified = 0x9FFF
|
||||
minCompatibility = 0xF900
|
||||
maxCompatibility = 0xFAFF
|
||||
minRare = 0x3400
|
||||
maxRare = 0x4DBF
|
||||
)
|
||||
const (
|
||||
commonUnifiedOffset = 0x10000
|
||||
rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
|
||||
otherOffset = 0x50000 // largest rune in rare is U+2FA1D
|
||||
illegalOffset = otherOffset + int(unicode.MaxRune)
|
||||
maxPrimary = illegalOffset + 1
|
||||
)
|
||||
|
||||
// implicitPrimary returns the primary weight for the a rune
|
||||
// for which there is no entry for the rune in the collation table.
|
||||
// We take a different approach from the one specified in
|
||||
// https://unicode.org/reports/tr10/#Implicit_Weights,
|
||||
// but preserve the resulting relative ordering of the runes.
|
||||
func implicitPrimary(r rune) int {
|
||||
if unicode.Is(unicode.Ideographic, r) {
|
||||
if r >= minUnified && r <= maxUnified {
|
||||
// The most common case for CJK.
|
||||
return int(r) + commonUnifiedOffset
|
||||
}
|
||||
if r >= minCompatibility && r <= maxCompatibility {
|
||||
// This will typically not hit. The DUCET explicitly specifies mappings
|
||||
// for all characters that do not decompose.
|
||||
return int(r) + commonUnifiedOffset
|
||||
}
|
||||
return int(r) + rareUnifiedOffset
|
||||
}
|
||||
return int(r) + otherOffset
|
||||
}
|
105
vendor/golang.org/x/text/internal/colltab/colltab.go
generated
vendored
Normal file
105
vendor/golang.org/x/text/internal/colltab/colltab.go
generated
vendored
Normal file
@ -0,0 +1,105 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package colltab contains functionality related to collation tables.
|
||||
// It is only to be used by the collate and search packages.
|
||||
package colltab // import "golang.org/x/text/internal/colltab"
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// MatchLang finds the index of t in tags, using a matching algorithm used for
|
||||
// collation and search. tags[0] must be language.Und, the remaining tags should
|
||||
// be sorted alphabetically.
|
||||
//
|
||||
// Language matching for collation and search is different from the matching
|
||||
// defined by language.Matcher: the (inferred) base language must be an exact
|
||||
// match for the relevant fields. For example, "gsw" should not match "de".
|
||||
// Also the parent relation is different, as a parent may have a different
|
||||
// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is
|
||||
// zh.
|
||||
func MatchLang(t language.Tag, tags []language.Tag) int {
|
||||
// Canonicalize the values, including collapsing macro languages.
|
||||
t, _ = language.All.Canonicalize(t)
|
||||
|
||||
base, conf := t.Base()
|
||||
// Estimate the base language, but only use high-confidence values.
|
||||
if conf < language.High {
|
||||
// The root locale supports "search" and "standard". We assume that any
|
||||
// implementation will only use one of both.
|
||||
return 0
|
||||
}
|
||||
|
||||
// Maximize base and script and normalize the tag.
|
||||
if _, s, r := t.Raw(); (r != language.Region{}) {
|
||||
p, _ := language.Raw.Compose(base, s, r)
|
||||
// Taking the parent forces the script to be maximized.
|
||||
p = p.Parent()
|
||||
// Add back region and extensions.
|
||||
t, _ = language.Raw.Compose(p, r, t.Extensions())
|
||||
} else {
|
||||
// Set the maximized base language.
|
||||
t, _ = language.Raw.Compose(base, s, t.Extensions())
|
||||
}
|
||||
|
||||
// Find start index of the language tag.
|
||||
start := 1 + sort.Search(len(tags)-1, func(i int) bool {
|
||||
b, _, _ := tags[i+1].Raw()
|
||||
return base.String() <= b.String()
|
||||
})
|
||||
if start < len(tags) {
|
||||
if b, _, _ := tags[start].Raw(); b != base {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// Besides the base language, script and region, only the collation type and
|
||||
// the custom variant defined in the 'u' extension are used to distinguish a
|
||||
// locale.
|
||||
// Strip all variants and extensions and add back the custom variant.
|
||||
tdef, _ := language.Raw.Compose(t.Raw())
|
||||
tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va"))
|
||||
|
||||
// First search for a specialized collation type, if present.
|
||||
try := []language.Tag{tdef}
|
||||
if co := t.TypeForKey("co"); co != "" {
|
||||
tco, _ := tdef.SetTypeForKey("co", co)
|
||||
try = []language.Tag{tco, tdef}
|
||||
}
|
||||
|
||||
for _, tx := range try {
|
||||
for ; tx != language.Und; tx = parent(tx) {
|
||||
for i, t := range tags[start:] {
|
||||
if b, _, _ := t.Raw(); b != base {
|
||||
break
|
||||
}
|
||||
if tx == t {
|
||||
return start + i
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// parent computes the structural parent. This means inheritance may change
|
||||
// script. So, unlike the CLDR parent, parent(zh-Hant) == zh.
|
||||
func parent(t language.Tag) language.Tag {
|
||||
if t.TypeForKey("va") != "" {
|
||||
t, _ = t.SetTypeForKey("va", "")
|
||||
return t
|
||||
}
|
||||
result := language.Und
|
||||
if b, s, r := t.Raw(); (r != language.Region{}) {
|
||||
result, _ = language.Raw.Compose(b, s, t.Extensions())
|
||||
} else if (s != language.Script{}) {
|
||||
result, _ = language.Raw.Compose(b, t.Extensions())
|
||||
} else if (b != language.Base{}) {
|
||||
result, _ = language.Raw.Compose(t.Extensions())
|
||||
}
|
||||
return result
|
||||
}
|
145
vendor/golang.org/x/text/internal/colltab/contract.go
generated
vendored
Normal file
145
vendor/golang.org/x/text/internal/colltab/contract.go
generated
vendored
Normal file
@ -0,0 +1,145 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package colltab
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// For a description of ContractTrieSet, see text/collate/build/contract.go.
|
||||
|
||||
type ContractTrieSet []struct{ L, H, N, I uint8 }
|
||||
|
||||
// ctScanner is used to match a trie to an input sequence.
|
||||
// A contraction may match a non-contiguous sequence of bytes in an input string.
|
||||
// For example, if there is a contraction for <a, combining_ring>, it should match
|
||||
// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does
|
||||
// not block combining_ring.
|
||||
// ctScanner does not automatically skip over non-blocking non-starters, but rather
|
||||
// retains the state of the last match and leaves it up to the user to continue
|
||||
// the match at the appropriate points.
|
||||
type ctScanner struct {
|
||||
states ContractTrieSet
|
||||
s []byte
|
||||
n int
|
||||
index int
|
||||
pindex int
|
||||
done bool
|
||||
}
|
||||
|
||||
type ctScannerString struct {
|
||||
states ContractTrieSet
|
||||
s string
|
||||
n int
|
||||
index int
|
||||
pindex int
|
||||
done bool
|
||||
}
|
||||
|
||||
func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {
|
||||
return ctScanner{s: b, states: t[index:], n: n}
|
||||
}
|
||||
|
||||
func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {
|
||||
return ctScannerString{s: str, states: t[index:], n: n}
|
||||
}
|
||||
|
||||
// result returns the offset i and bytes consumed p so far. If no suffix
|
||||
// matched, i and p will be 0.
|
||||
func (s *ctScanner) result() (i, p int) {
|
||||
return s.index, s.pindex
|
||||
}
|
||||
|
||||
func (s *ctScannerString) result() (i, p int) {
|
||||
return s.index, s.pindex
|
||||
}
|
||||
|
||||
const (
|
||||
final = 0
|
||||
noIndex = 0xFF
|
||||
)
|
||||
|
||||
// scan matches the longest suffix at the current location in the input
|
||||
// and returns the number of bytes consumed.
|
||||
func (s *ctScanner) scan(p int) int {
|
||||
pr := p // the p at the rune start
|
||||
str := s.s
|
||||
states, n := s.states, s.n
|
||||
for i := 0; i < n && p < len(str); {
|
||||
e := states[i]
|
||||
c := str[p]
|
||||
// TODO: a significant number of contractions are of a form that
|
||||
// cannot match discontiguous UTF-8 in a normalized string. We could let
|
||||
// a negative value of e.n mean that we can set s.done = true and avoid
|
||||
// the need for additional matches.
|
||||
if c >= e.L {
|
||||
if e.L == c {
|
||||
p++
|
||||
if e.I != noIndex {
|
||||
s.index = int(e.I)
|
||||
s.pindex = p
|
||||
}
|
||||
if e.N != final {
|
||||
i, states, n = 0, states[int(e.H)+n:], int(e.N)
|
||||
if p >= len(str) || utf8.RuneStart(str[p]) {
|
||||
s.states, s.n, pr = states, n, p
|
||||
}
|
||||
} else {
|
||||
s.done = true
|
||||
return p
|
||||
}
|
||||
continue
|
||||
} else if e.N == final && c <= e.H {
|
||||
p++
|
||||
s.done = true
|
||||
s.index = int(c-e.L) + int(e.I)
|
||||
s.pindex = p
|
||||
return p
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
return pr
|
||||
}
|
||||
|
||||
// scan is a verbatim copy of ctScanner.scan.
|
||||
func (s *ctScannerString) scan(p int) int {
|
||||
pr := p // the p at the rune start
|
||||
str := s.s
|
||||
states, n := s.states, s.n
|
||||
for i := 0; i < n && p < len(str); {
|
||||
e := states[i]
|
||||
c := str[p]
|
||||
// TODO: a significant number of contractions are of a form that
|
||||
// cannot match discontiguous UTF-8 in a normalized string. We could let
|
||||
// a negative value of e.n mean that we can set s.done = true and avoid
|
||||
// the need for additional matches.
|
||||
if c >= e.L {
|
||||
if e.L == c {
|
||||
p++
|
||||
if e.I != noIndex {
|
||||
s.index = int(e.I)
|
||||
s.pindex = p
|
||||
}
|
||||
if e.N != final {
|
||||
i, states, n = 0, states[int(e.H)+n:], int(e.N)
|
||||
if p >= len(str) || utf8.RuneStart(str[p]) {
|
||||
s.states, s.n, pr = states, n, p
|
||||
}
|
||||
} else {
|
||||
s.done = true
|
||||
return p
|
||||
}
|
||||
continue
|
||||
} else if e.N == final && c <= e.H {
|
||||
p++
|
||||
s.done = true
|
||||
s.index = int(c-e.L) + int(e.I)
|
||||
s.pindex = p
|
||||
return p
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
return pr
|
||||
}
|
178
vendor/golang.org/x/text/internal/colltab/iter.go
generated
vendored
Normal file
178
vendor/golang.org/x/text/internal/colltab/iter.go
generated
vendored
Normal file
@ -0,0 +1,178 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package colltab
|
||||
|
||||
// An Iter incrementally converts chunks of the input text to collation
|
||||
// elements, while ensuring that the collation elements are in normalized order
|
||||
// (that is, they are in the order as if the input text were normalized first).
|
||||
type Iter struct {
|
||||
Weighter Weighter
|
||||
Elems []Elem
|
||||
// N is the number of elements in Elems that will not be reordered on
|
||||
// subsequent iterations, N <= len(Elems).
|
||||
N int
|
||||
|
||||
bytes []byte
|
||||
str string
|
||||
// Because the Elems buffer may contain collation elements that are needed
|
||||
// for look-ahead, we need two positions in the text (bytes or str): one for
|
||||
// the end position in the text for the current iteration and one for the
|
||||
// start of the next call to appendNext.
|
||||
pEnd int // end position in text corresponding to N.
|
||||
pNext int // pEnd <= pNext.
|
||||
}
|
||||
|
||||
// Reset sets the position in the current input text to p and discards any
|
||||
// results obtained so far.
|
||||
func (i *Iter) Reset(p int) {
|
||||
i.Elems = i.Elems[:0]
|
||||
i.N = 0
|
||||
i.pEnd = p
|
||||
i.pNext = p
|
||||
}
|
||||
|
||||
// Len returns the length of the input text.
|
||||
func (i *Iter) Len() int {
|
||||
if i.bytes != nil {
|
||||
return len(i.bytes)
|
||||
}
|
||||
return len(i.str)
|
||||
}
|
||||
|
||||
// Discard removes the collation elements up to N.
|
||||
func (i *Iter) Discard() {
|
||||
// TODO: change this such that only modifiers following starters will have
|
||||
// to be copied.
|
||||
i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])]
|
||||
i.N = 0
|
||||
}
|
||||
|
||||
// End returns the end position of the input text for which Next has returned
|
||||
// results.
|
||||
func (i *Iter) End() int {
|
||||
return i.pEnd
|
||||
}
|
||||
|
||||
// SetInput resets i to input s.
|
||||
func (i *Iter) SetInput(s []byte) {
|
||||
i.bytes = s
|
||||
i.str = ""
|
||||
i.Reset(0)
|
||||
}
|
||||
|
||||
// SetInputString resets i to input s.
|
||||
func (i *Iter) SetInputString(s string) {
|
||||
i.str = s
|
||||
i.bytes = nil
|
||||
i.Reset(0)
|
||||
}
|
||||
|
||||
func (i *Iter) done() bool {
|
||||
return i.pNext >= len(i.str) && i.pNext >= len(i.bytes)
|
||||
}
|
||||
|
||||
func (i *Iter) appendNext() bool {
|
||||
if i.done() {
|
||||
return false
|
||||
}
|
||||
var sz int
|
||||
if i.bytes == nil {
|
||||
i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:])
|
||||
} else {
|
||||
i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:])
|
||||
}
|
||||
if sz == 0 {
|
||||
sz = 1
|
||||
}
|
||||
i.pNext += sz
|
||||
return true
|
||||
}
|
||||
|
||||
// Next appends Elems to the internal array. On each iteration, it will either
|
||||
// add starters or modifiers. In the majority of cases, an Elem with a primary
|
||||
// value > 0 will have a CCC of 0. The CCC values of collation elements are also
|
||||
// used to detect if the input string was not normalized and to adjust the
|
||||
// result accordingly.
|
||||
func (i *Iter) Next() bool {
|
||||
if i.N == len(i.Elems) && !i.appendNext() {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if the current segment starts with a starter.
|
||||
prevCCC := i.Elems[len(i.Elems)-1].CCC()
|
||||
if prevCCC == 0 {
|
||||
i.N = len(i.Elems)
|
||||
i.pEnd = i.pNext
|
||||
return true
|
||||
} else if i.Elems[i.N].CCC() == 0 {
|
||||
// set i.N to only cover part of i.Elems for which prevCCC == 0 and
|
||||
// use rest for the next call to next.
|
||||
for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ {
|
||||
}
|
||||
i.pEnd = i.pNext
|
||||
return true
|
||||
}
|
||||
|
||||
// The current (partial) segment starts with modifiers. We need to collect
|
||||
// all successive modifiers to ensure that they are normalized.
|
||||
for {
|
||||
p := len(i.Elems)
|
||||
i.pEnd = i.pNext
|
||||
if !i.appendNext() {
|
||||
break
|
||||
}
|
||||
|
||||
if ccc := i.Elems[p].CCC(); ccc == 0 || len(i.Elems)-i.N > maxCombiningCharacters {
|
||||
// Leave the starter for the next iteration. This ensures that we
|
||||
// do not return sequences of collation elements that cross two
|
||||
// segments.
|
||||
//
|
||||
// TODO: handle large number of combining characters by fully
|
||||
// normalizing the input segment before iteration. This ensures
|
||||
// results are consistent across the text repo.
|
||||
i.N = p
|
||||
return true
|
||||
} else if ccc < prevCCC {
|
||||
i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC.
|
||||
} else {
|
||||
prevCCC = ccc
|
||||
}
|
||||
}
|
||||
|
||||
done := len(i.Elems) != i.N
|
||||
i.N = len(i.Elems)
|
||||
return done
|
||||
}
|
||||
|
||||
// nextNoNorm is the same as next, but does not "normalize" the collation
|
||||
// elements.
|
||||
func (i *Iter) nextNoNorm() bool {
|
||||
// TODO: remove this function. Using this instead of next does not seem
|
||||
// to improve performance in any significant way. We retain this until
|
||||
// later for evaluation purposes.
|
||||
if i.done() {
|
||||
return false
|
||||
}
|
||||
i.appendNext()
|
||||
i.N = len(i.Elems)
|
||||
return true
|
||||
}
|
||||
|
||||
const maxCombiningCharacters = 30
|
||||
|
||||
// doNorm reorders the collation elements in i.Elems.
|
||||
// It assumes that blocks of collation elements added with appendNext
|
||||
// either start and end with the same CCC or start with CCC == 0.
|
||||
// This allows for a single insertion point for the entire block.
|
||||
// The correctness of this assumption is verified in builder.go.
|
||||
func (i *Iter) doNorm(p int, ccc uint8) {
|
||||
n := len(i.Elems)
|
||||
k := p
|
||||
for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- {
|
||||
}
|
||||
i.Elems = append(i.Elems, i.Elems[p:k]...)
|
||||
copy(i.Elems[p:], i.Elems[k:])
|
||||
i.Elems = i.Elems[:n]
|
||||
}
|
236
vendor/golang.org/x/text/internal/colltab/numeric.go
generated
vendored
Normal file
236
vendor/golang.org/x/text/internal/colltab/numeric.go
generated
vendored
Normal file
@ -0,0 +1,236 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package colltab
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// NewNumericWeighter wraps w to replace individual digits to sort based on their
|
||||
// numeric value.
|
||||
//
|
||||
// Weighter w must have a free primary weight after the primary weight for 9.
|
||||
// If this is not the case, numeric value will sort at the same primary level
|
||||
// as the first primary sorting after 9.
|
||||
func NewNumericWeighter(w Weighter) Weighter {
|
||||
getElem := func(s string) Elem {
|
||||
elems, _ := w.AppendNextString(nil, s)
|
||||
return elems[0]
|
||||
}
|
||||
nine := getElem("9")
|
||||
|
||||
// Numbers should order before zero, but the DUCET has no room for this.
|
||||
// TODO: move before zero once we use fractional collation elements.
|
||||
ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0)
|
||||
|
||||
return &numericWeighter{
|
||||
Weighter: w,
|
||||
|
||||
// We assume that w sorts digits of different kinds in order of numeric
|
||||
// value and that the tertiary weight order is preserved.
|
||||
//
|
||||
// TODO: evaluate whether it is worth basing the ranges on the Elem
|
||||
// encoding itself once the move to fractional weights is complete.
|
||||
zero: getElem("0"),
|
||||
zeroSpecialLo: getElem("0"), // U+FF10 FULLWIDTH DIGIT ZERO
|
||||
zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO
|
||||
nine: nine,
|
||||
nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE
|
||||
numberStart: ns,
|
||||
}
|
||||
}
|
||||
|
||||
// A numericWeighter translates a stream of digits into a stream of weights
|
||||
// representing the numeric value.
|
||||
type numericWeighter struct {
|
||||
Weighter
|
||||
|
||||
// The Elems below all demarcate boundaries of specific ranges. With the
|
||||
// current element encoding digits are in two ranges: normal (default
|
||||
// tertiary value) and special. For most languages, digits have collation
|
||||
// elements in the normal range.
|
||||
//
|
||||
// Note: the range tests are very specific for the element encoding used by
|
||||
// this implementation. The tests in collate_test.go are designed to fail
|
||||
// if this code is not updated when an encoding has changed.
|
||||
|
||||
zero Elem // normal digit zero
|
||||
zeroSpecialLo Elem // special digit zero, low tertiary value
|
||||
zeroSpecialHi Elem // special digit zero, high tertiary value
|
||||
nine Elem // normal digit nine
|
||||
nineSpecialHi Elem // special digit nine
|
||||
numberStart Elem
|
||||
}
|
||||
|
||||
// AppendNext calls the namesake of the underlying weigher, but replaces single
|
||||
// digits with weights representing their value.
|
||||
func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
|
||||
ce, n = nw.Weighter.AppendNext(buf, s)
|
||||
nc := numberConverter{
|
||||
elems: buf,
|
||||
w: nw,
|
||||
b: s,
|
||||
}
|
||||
isZero, ok := nc.checkNextDigit(ce)
|
||||
if !ok {
|
||||
return ce, n
|
||||
}
|
||||
// ce might have been grown already, so take it instead of buf.
|
||||
nc.init(ce, len(buf), isZero)
|
||||
for n < len(s) {
|
||||
ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
|
||||
nc.b = s
|
||||
n += sz
|
||||
if !nc.update(ce) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return nc.result(), n
|
||||
}
|
||||
|
||||
// AppendNextString calls the namesake of the underlying weigher, but replaces
|
||||
// single digits with weights representing their value.
|
||||
func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) {
|
||||
ce, n = nw.Weighter.AppendNextString(buf, s)
|
||||
nc := numberConverter{
|
||||
elems: buf,
|
||||
w: nw,
|
||||
s: s,
|
||||
}
|
||||
isZero, ok := nc.checkNextDigit(ce)
|
||||
if !ok {
|
||||
return ce, n
|
||||
}
|
||||
nc.init(ce, len(buf), isZero)
|
||||
for n < len(s) {
|
||||
ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
|
||||
nc.s = s
|
||||
n += sz
|
||||
if !nc.update(ce) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return nc.result(), n
|
||||
}
|
||||
|
||||
type numberConverter struct {
|
||||
w *numericWeighter
|
||||
|
||||
elems []Elem
|
||||
nDigits int
|
||||
lenIndex int
|
||||
|
||||
s string // set if the input was of type string
|
||||
b []byte // set if the input was of type []byte
|
||||
}
|
||||
|
||||
// init completes initialization of a numberConverter and prepares it for adding
|
||||
// more digits. elems is assumed to have a digit starting at oldLen.
|
||||
func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
|
||||
// Insert a marker indicating the start of a number and a placeholder
|
||||
// for the number of digits.
|
||||
if isZero {
|
||||
elems = append(elems[:oldLen], nc.w.numberStart, 0)
|
||||
} else {
|
||||
elems = append(elems, 0, 0)
|
||||
copy(elems[oldLen+2:], elems[oldLen:])
|
||||
elems[oldLen] = nc.w.numberStart
|
||||
elems[oldLen+1] = 0
|
||||
|
||||
nc.nDigits = 1
|
||||
}
|
||||
nc.elems = elems
|
||||
nc.lenIndex = oldLen + 1
|
||||
}
|
||||
|
||||
// checkNextDigit reports whether bufNew adds a single digit relative to the old
|
||||
// buffer. If it does, it also reports whether this digit is zero.
|
||||
func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) {
|
||||
if len(nc.elems) >= len(bufNew) {
|
||||
return false, false
|
||||
}
|
||||
e := bufNew[len(nc.elems)]
|
||||
if e < nc.w.zeroSpecialLo || nc.w.nine < e {
|
||||
// Not a number.
|
||||
return false, false
|
||||
}
|
||||
if e < nc.w.zero {
|
||||
if e > nc.w.nineSpecialHi {
|
||||
// Not a number.
|
||||
return false, false
|
||||
}
|
||||
if !nc.isDigit() {
|
||||
return false, false
|
||||
}
|
||||
isZero = e <= nc.w.zeroSpecialHi
|
||||
} else {
|
||||
// This is the common case if we encounter a digit.
|
||||
isZero = e == nc.w.zero
|
||||
}
|
||||
// Test the remaining added collation elements have a zero primary value.
|
||||
if n := len(bufNew) - len(nc.elems); n > 1 {
|
||||
for i := len(nc.elems) + 1; i < len(bufNew); i++ {
|
||||
if bufNew[i].Primary() != 0 {
|
||||
return false, false
|
||||
}
|
||||
}
|
||||
// In some rare cases, collation elements will encode runes in
|
||||
// unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371)
|
||||
// are not in Nd. Also some digits that clearly belong in unicode.No,
|
||||
// like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have
|
||||
// collation elements indistinguishable from normal digits.
|
||||
// Unfortunately, this means we need to make this check for nearly all
|
||||
// non-Latin digits.
|
||||
//
|
||||
// TODO: check the performance impact and find something better if it is
|
||||
// an issue.
|
||||
if !nc.isDigit() {
|
||||
return false, false
|
||||
}
|
||||
}
|
||||
return isZero, true
|
||||
}
|
||||
|
||||
func (nc *numberConverter) isDigit() bool {
|
||||
if nc.b != nil {
|
||||
r, _ := utf8.DecodeRune(nc.b)
|
||||
return unicode.In(r, unicode.Nd)
|
||||
}
|
||||
r, _ := utf8.DecodeRuneInString(nc.s)
|
||||
return unicode.In(r, unicode.Nd)
|
||||
}
|
||||
|
||||
// We currently support a maximum of about 2M digits (the number of primary
|
||||
// values). Such numbers will compare correctly against small numbers, but their
|
||||
// comparison against other large numbers is undefined.
|
||||
//
|
||||
// TODO: define a proper fallback, such as comparing large numbers textually or
|
||||
// actually allowing numbers of unlimited length.
|
||||
//
|
||||
// TODO: cap this to a lower number (like 100) and maybe allow a larger number
|
||||
// in an option?
|
||||
const maxDigits = 1<<maxPrimaryBits - 1
|
||||
|
||||
func (nc *numberConverter) update(elems []Elem) bool {
|
||||
isZero, ok := nc.checkNextDigit(elems)
|
||||
if nc.nDigits == 0 && isZero {
|
||||
return true
|
||||
}
|
||||
nc.elems = elems
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
nc.nDigits++
|
||||
return nc.nDigits < maxDigits
|
||||
}
|
||||
|
||||
// result fills in the length element for the digit sequence and returns the
|
||||
// completed collation elements.
|
||||
func (nc *numberConverter) result() []Elem {
|
||||
e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0)
|
||||
nc.elems[nc.lenIndex] = e
|
||||
return nc.elems
|
||||
}
|
275
vendor/golang.org/x/text/internal/colltab/table.go
generated
vendored
Normal file
275
vendor/golang.org/x/text/internal/colltab/table.go
generated
vendored
Normal file
@ -0,0 +1,275 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package colltab
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// Table holds all collation data for a given collation ordering.
|
||||
type Table struct {
|
||||
Index Trie // main trie
|
||||
|
||||
// expansion info
|
||||
ExpandElem []uint32
|
||||
|
||||
// contraction info
|
||||
ContractTries ContractTrieSet
|
||||
ContractElem []uint32
|
||||
MaxContractLen int
|
||||
VariableTop uint32
|
||||
}
|
||||
|
||||
func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
|
||||
return t.appendNext(w, source{bytes: b})
|
||||
}
|
||||
|
||||
func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
|
||||
return t.appendNext(w, source{str: s})
|
||||
}
|
||||
|
||||
func (t *Table) Start(p int, b []byte) int {
|
||||
// TODO: implement
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (t *Table) StartString(p int, s string) int {
|
||||
// TODO: implement
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (t *Table) Domain() []string {
|
||||
// TODO: implement
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (t *Table) Top() uint32 {
|
||||
return t.VariableTop
|
||||
}
|
||||
|
||||
type source struct {
|
||||
str string
|
||||
bytes []byte
|
||||
}
|
||||
|
||||
func (src *source) lookup(t *Table) (ce Elem, sz int) {
|
||||
if src.bytes == nil {
|
||||
return t.Index.lookupString(src.str)
|
||||
}
|
||||
return t.Index.lookup(src.bytes)
|
||||
}
|
||||
|
||||
func (src *source) tail(sz int) {
|
||||
if src.bytes == nil {
|
||||
src.str = src.str[sz:]
|
||||
} else {
|
||||
src.bytes = src.bytes[sz:]
|
||||
}
|
||||
}
|
||||
|
||||
func (src *source) nfd(buf []byte, end int) []byte {
|
||||
if src.bytes == nil {
|
||||
return norm.NFD.AppendString(buf[:0], src.str[:end])
|
||||
}
|
||||
return norm.NFD.Append(buf[:0], src.bytes[:end]...)
|
||||
}
|
||||
|
||||
func (src *source) rune() (r rune, sz int) {
|
||||
if src.bytes == nil {
|
||||
return utf8.DecodeRuneInString(src.str)
|
||||
}
|
||||
return utf8.DecodeRune(src.bytes)
|
||||
}
|
||||
|
||||
func (src *source) properties(f norm.Form) norm.Properties {
|
||||
if src.bytes == nil {
|
||||
return f.PropertiesString(src.str)
|
||||
}
|
||||
return f.Properties(src.bytes)
|
||||
}
|
||||
|
||||
// appendNext appends the weights corresponding to the next rune or
|
||||
// contraction in s. If a contraction is matched to a discontinuous
|
||||
// sequence of runes, the weights for the interstitial runes are
|
||||
// appended as well. It returns a new slice that includes the appended
|
||||
// weights and the number of bytes consumed from s.
|
||||
func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) {
|
||||
ce, sz := src.lookup(t)
|
||||
tp := ce.ctype()
|
||||
if tp == ceNormal {
|
||||
if ce == 0 {
|
||||
r, _ := src.rune()
|
||||
const (
|
||||
hangulSize = 3
|
||||
firstHangul = 0xAC00
|
||||
lastHangul = 0xD7A3
|
||||
)
|
||||
if r >= firstHangul && r <= lastHangul {
|
||||
// TODO: performance can be considerably improved here.
|
||||
n = sz
|
||||
var buf [16]byte // Used for decomposing Hangul.
|
||||
for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
|
||||
ce, sz = t.Index.lookup(b)
|
||||
w = append(w, ce)
|
||||
}
|
||||
return w, n
|
||||
}
|
||||
ce = makeImplicitCE(implicitPrimary(r))
|
||||
}
|
||||
w = append(w, ce)
|
||||
} else if tp == ceExpansionIndex {
|
||||
w = t.appendExpansion(w, ce)
|
||||
} else if tp == ceContractionIndex {
|
||||
n := 0
|
||||
src.tail(sz)
|
||||
if src.bytes == nil {
|
||||
w, n = t.matchContractionString(w, ce, src.str)
|
||||
} else {
|
||||
w, n = t.matchContraction(w, ce, src.bytes)
|
||||
}
|
||||
sz += n
|
||||
} else if tp == ceDecompose {
|
||||
// Decompose using NFKD and replace tertiary weights.
|
||||
t1, t2 := splitDecompose(ce)
|
||||
i := len(w)
|
||||
nfkd := src.properties(norm.NFKD).Decomposition()
|
||||
for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
|
||||
w, p = t.appendNext(w, source{bytes: nfkd})
|
||||
}
|
||||
w[i] = w[i].updateTertiary(t1)
|
||||
if i++; i < len(w) {
|
||||
w[i] = w[i].updateTertiary(t2)
|
||||
for i++; i < len(w); i++ {
|
||||
w[i] = w[i].updateTertiary(maxTertiary)
|
||||
}
|
||||
}
|
||||
}
|
||||
return w, sz
|
||||
}
|
||||
|
||||
func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem {
|
||||
i := splitExpandIndex(ce)
|
||||
n := int(t.ExpandElem[i])
|
||||
i++
|
||||
for _, ce := range t.ExpandElem[i : i+n] {
|
||||
w = append(w, Elem(ce))
|
||||
}
|
||||
return w
|
||||
}
|
||||
|
||||
func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
|
||||
index, n, offset := splitContractIndex(ce)
|
||||
|
||||
scan := t.ContractTries.scanner(index, n, suffix)
|
||||
buf := [norm.MaxSegmentSize]byte{}
|
||||
bufp := 0
|
||||
p := scan.scan(0)
|
||||
|
||||
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
|
||||
// By now we should have filtered most cases.
|
||||
p0 := p
|
||||
bufn := 0
|
||||
rune := norm.NFD.Properties(suffix[p:])
|
||||
p += rune.Size()
|
||||
if rune.LeadCCC() != 0 {
|
||||
prevCC := rune.TrailCCC()
|
||||
// A gap may only occur in the last normalization segment.
|
||||
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
|
||||
if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
|
||||
scan.s = suffix[:p+end]
|
||||
}
|
||||
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
|
||||
rune = norm.NFD.Properties(suffix[p:])
|
||||
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
|
||||
break
|
||||
}
|
||||
prevCC = rune.TrailCCC()
|
||||
if pp := scan.scan(p); pp != p {
|
||||
// Copy the interstitial runes for later processing.
|
||||
bufn += copy(buf[bufn:], suffix[p0:p])
|
||||
if scan.pindex == pp {
|
||||
bufp = bufn
|
||||
}
|
||||
p, p0 = pp, pp
|
||||
} else {
|
||||
p += rune.Size()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Append weights for the matched contraction, which may be an expansion.
|
||||
i, n := scan.result()
|
||||
ce = Elem(t.ContractElem[i+offset])
|
||||
if ce.ctype() == ceNormal {
|
||||
w = append(w, ce)
|
||||
} else {
|
||||
w = t.appendExpansion(w, ce)
|
||||
}
|
||||
// Append weights for the runes in the segment not part of the contraction.
|
||||
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
|
||||
w, p = t.appendNext(w, source{bytes: b})
|
||||
}
|
||||
return w, n
|
||||
}
|
||||
|
||||
// TODO: unify the two implementations. This is best done after first simplifying
|
||||
// the algorithm taking into account the inclusion of both NFC and NFD forms
|
||||
// in the table.
|
||||
func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
|
||||
index, n, offset := splitContractIndex(ce)
|
||||
|
||||
scan := t.ContractTries.scannerString(index, n, suffix)
|
||||
buf := [norm.MaxSegmentSize]byte{}
|
||||
bufp := 0
|
||||
p := scan.scan(0)
|
||||
|
||||
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
|
||||
// By now we should have filtered most cases.
|
||||
p0 := p
|
||||
bufn := 0
|
||||
rune := norm.NFD.PropertiesString(suffix[p:])
|
||||
p += rune.Size()
|
||||
if rune.LeadCCC() != 0 {
|
||||
prevCC := rune.TrailCCC()
|
||||
// A gap may only occur in the last normalization segment.
|
||||
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
|
||||
if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
|
||||
scan.s = suffix[:p+end]
|
||||
}
|
||||
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
|
||||
rune = norm.NFD.PropertiesString(suffix[p:])
|
||||
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
|
||||
break
|
||||
}
|
||||
prevCC = rune.TrailCCC()
|
||||
if pp := scan.scan(p); pp != p {
|
||||
// Copy the interstitial runes for later processing.
|
||||
bufn += copy(buf[bufn:], suffix[p0:p])
|
||||
if scan.pindex == pp {
|
||||
bufp = bufn
|
||||
}
|
||||
p, p0 = pp, pp
|
||||
} else {
|
||||
p += rune.Size()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Append weights for the matched contraction, which may be an expansion.
|
||||
i, n := scan.result()
|
||||
ce = Elem(t.ContractElem[i+offset])
|
||||
if ce.ctype() == ceNormal {
|
||||
w = append(w, ce)
|
||||
} else {
|
||||
w = t.appendExpansion(w, ce)
|
||||
}
|
||||
// Append weights for the runes in the segment not part of the contraction.
|
||||
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
|
||||
w, p = t.appendNext(w, source{bytes: b})
|
||||
}
|
||||
return w, n
|
||||
}
|
159
vendor/golang.org/x/text/internal/colltab/trie.go
generated
vendored
Normal file
159
vendor/golang.org/x/text/internal/colltab/trie.go
generated
vendored
Normal file
@ -0,0 +1,159 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// The trie in this file is used to associate the first full character in an
|
||||
// UTF-8 string to a collation element. All but the last byte in a UTF-8 byte
|
||||
// sequence are used to lookup offsets in the index table to be used for the
|
||||
// next byte. The last byte is used to index into a table of collation elements.
|
||||
// For a full description, see go.text/collate/build/trie.go.
|
||||
|
||||
package colltab
|
||||
|
||||
const blockSize = 64
|
||||
|
||||
type Trie struct {
|
||||
Index0 []uint16 // index for first byte (0xC0-0xFF)
|
||||
Values0 []uint32 // index for first byte (0x00-0x7F)
|
||||
Index []uint16
|
||||
Values []uint32
|
||||
}
|
||||
|
||||
const (
|
||||
t1 = 0x00 // 0000 0000
|
||||
tx = 0x80 // 1000 0000
|
||||
t2 = 0xC0 // 1100 0000
|
||||
t3 = 0xE0 // 1110 0000
|
||||
t4 = 0xF0 // 1111 0000
|
||||
t5 = 0xF8 // 1111 1000
|
||||
t6 = 0xFC // 1111 1100
|
||||
te = 0xFE // 1111 1110
|
||||
)
|
||||
|
||||
func (t *Trie) lookupValue(n uint16, b byte) Elem {
|
||||
return Elem(t.Values[int(n)<<6+int(b)])
|
||||
}
|
||||
|
||||
// lookup returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func (t *Trie) lookup(s []byte) (v Elem, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < tx:
|
||||
return Elem(t.Values0[c0]), 1
|
||||
case c0 < t2:
|
||||
return 0, 1
|
||||
case c0 < t3:
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.Index0[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
return t.lookupValue(i, c1), 2
|
||||
case c0 < t4:
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.Index0[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
o := int(i)<<6 + int(c1)
|
||||
i = t.Index[o]
|
||||
c2 := s[2]
|
||||
if c2 < tx || t2 <= c2 {
|
||||
return 0, 2
|
||||
}
|
||||
return t.lookupValue(i, c2), 3
|
||||
case c0 < t5:
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.Index0[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
o := int(i)<<6 + int(c1)
|
||||
i = t.Index[o]
|
||||
c2 := s[2]
|
||||
if c2 < tx || t2 <= c2 {
|
||||
return 0, 2
|
||||
}
|
||||
o = int(i)<<6 + int(c2)
|
||||
i = t.Index[o]
|
||||
c3 := s[3]
|
||||
if c3 < tx || t2 <= c3 {
|
||||
return 0, 3
|
||||
}
|
||||
return t.lookupValue(i, c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// The body of lookupString is a verbatim copy of that of lookup.
|
||||
func (t *Trie) lookupString(s string) (v Elem, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < tx:
|
||||
return Elem(t.Values0[c0]), 1
|
||||
case c0 < t2:
|
||||
return 0, 1
|
||||
case c0 < t3:
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.Index0[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
return t.lookupValue(i, c1), 2
|
||||
case c0 < t4:
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.Index0[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
o := int(i)<<6 + int(c1)
|
||||
i = t.Index[o]
|
||||
c2 := s[2]
|
||||
if c2 < tx || t2 <= c2 {
|
||||
return 0, 2
|
||||
}
|
||||
return t.lookupValue(i, c2), 3
|
||||
case c0 < t5:
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.Index0[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
o := int(i)<<6 + int(c1)
|
||||
i = t.Index[o]
|
||||
c2 := s[2]
|
||||
if c2 < tx || t2 <= c2 {
|
||||
return 0, 2
|
||||
}
|
||||
o = int(i)<<6 + int(c2)
|
||||
i = t.Index[o]
|
||||
c3 := s[3]
|
||||
if c3 < tx || t2 <= c3 {
|
||||
return 0, 3
|
||||
}
|
||||
return t.lookupValue(i, c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
31
vendor/golang.org/x/text/internal/colltab/weighter.go
generated
vendored
Normal file
31
vendor/golang.org/x/text/internal/colltab/weighter.go
generated
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package colltab // import "golang.org/x/text/internal/colltab"
|
||||
|
||||
// A Weighter can be used as a source for Collator and Searcher.
|
||||
type Weighter interface {
|
||||
// Start finds the start of the segment that includes position p.
|
||||
Start(p int, b []byte) int
|
||||
|
||||
// StartString finds the start of the segment that includes position p.
|
||||
StartString(p int, s string) int
|
||||
|
||||
// AppendNext appends Elems to buf corresponding to the longest match
|
||||
// of a single character or contraction from the start of s.
|
||||
// It returns the new buf and the number of bytes consumed.
|
||||
AppendNext(buf []Elem, s []byte) (ce []Elem, n int)
|
||||
|
||||
// AppendNextString appends Elems to buf corresponding to the longest match
|
||||
// of a single character or contraction from the start of s.
|
||||
// It returns the new buf and the number of bytes consumed.
|
||||
AppendNextString(buf []Elem, s string) (ce []Elem, n int)
|
||||
|
||||
// Domain returns a slice of all single characters and contractions for which
|
||||
// collation elements are defined in this table.
|
||||
Domain() []string
|
||||
|
||||
// Top returns the highest variable primary value.
|
||||
Top() uint32
|
||||
}
|
375
vendor/golang.org/x/text/internal/gen/code.go
generated
vendored
Normal file
375
vendor/golang.org/x/text/internal/gen/code.go
generated
vendored
Normal file
@ -0,0 +1,375 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package gen
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"hash"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// This file contains utilities for generating code.
|
||||
|
||||
// TODO: other write methods like:
|
||||
// - slices, maps, types, etc.
|
||||
|
||||
// CodeWriter is a utility for writing structured code. It computes the content
|
||||
// hash and size of written content. It ensures there are newlines between
|
||||
// written code blocks.
|
||||
type CodeWriter struct {
|
||||
buf bytes.Buffer
|
||||
Size int
|
||||
Hash hash.Hash32 // content hash
|
||||
gob *gob.Encoder
|
||||
// For comments we skip the usual one-line separator if they are followed by
|
||||
// a code block.
|
||||
skipSep bool
|
||||
}
|
||||
|
||||
func (w *CodeWriter) Write(p []byte) (n int, err error) {
|
||||
return w.buf.Write(p)
|
||||
}
|
||||
|
||||
// NewCodeWriter returns a new CodeWriter.
|
||||
func NewCodeWriter() *CodeWriter {
|
||||
h := fnv.New32()
|
||||
return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)}
|
||||
}
|
||||
|
||||
// WriteGoFile appends the buffer with the total size of all created structures
|
||||
// and writes it as a Go file to the given file with the given package name.
|
||||
func (w *CodeWriter) WriteGoFile(filename, pkg string) {
|
||||
f, err := os.Create(filename)
|
||||
if err != nil {
|
||||
log.Fatalf("Could not create file %s: %v", filename, err)
|
||||
}
|
||||
defer f.Close()
|
||||
if _, err = w.WriteGo(f, pkg, ""); err != nil {
|
||||
log.Fatalf("Error writing file %s: %v", filename, err)
|
||||
}
|
||||
}
|
||||
|
||||
// WriteVersionedGoFile appends the buffer with the total size of all created
|
||||
// structures and writes it as a Go file to the given file with the given
|
||||
// package name and build tags for the current Unicode version,
|
||||
func (w *CodeWriter) WriteVersionedGoFile(filename, pkg string) {
|
||||
tags := buildTags()
|
||||
if tags != "" {
|
||||
pattern := fileToPattern(filename)
|
||||
updateBuildTags(pattern)
|
||||
filename = fmt.Sprintf(pattern, UnicodeVersion())
|
||||
}
|
||||
f, err := os.Create(filename)
|
||||
if err != nil {
|
||||
log.Fatalf("Could not create file %s: %v", filename, err)
|
||||
}
|
||||
defer f.Close()
|
||||
if _, err = w.WriteGo(f, pkg, tags); err != nil {
|
||||
log.Fatalf("Error writing file %s: %v", filename, err)
|
||||
}
|
||||
}
|
||||
|
||||
// WriteGo appends the buffer with the total size of all created structures and
|
||||
// writes it as a Go file to the given writer with the given package name.
|
||||
func (w *CodeWriter) WriteGo(out io.Writer, pkg, tags string) (n int, err error) {
|
||||
sz := w.Size
|
||||
if sz > 0 {
|
||||
w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32())
|
||||
}
|
||||
defer w.buf.Reset()
|
||||
return WriteGo(out, pkg, tags, w.buf.Bytes())
|
||||
}
|
||||
|
||||
func (w *CodeWriter) printf(f string, x ...interface{}) {
|
||||
fmt.Fprintf(w, f, x...)
|
||||
}
|
||||
|
||||
func (w *CodeWriter) insertSep() {
|
||||
if w.skipSep {
|
||||
w.skipSep = false
|
||||
return
|
||||
}
|
||||
// Use at least two newlines to ensure a blank space between the previous
|
||||
// block. WriteGoFile will remove extraneous newlines.
|
||||
w.printf("\n\n")
|
||||
}
|
||||
|
||||
// WriteComment writes a comment block. All line starts are prefixed with "//".
|
||||
// Initial empty lines are gobbled. The indentation for the first line is
|
||||
// stripped from consecutive lines.
|
||||
func (w *CodeWriter) WriteComment(comment string, args ...interface{}) {
|
||||
s := fmt.Sprintf(comment, args...)
|
||||
s = strings.Trim(s, "\n")
|
||||
|
||||
// Use at least two newlines to ensure a blank space between the previous
|
||||
// block. WriteGoFile will remove extraneous newlines.
|
||||
w.printf("\n\n// ")
|
||||
w.skipSep = true
|
||||
|
||||
// strip first indent level.
|
||||
sep := "\n"
|
||||
for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] {
|
||||
sep += s[:1]
|
||||
}
|
||||
|
||||
strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s)
|
||||
|
||||
w.printf("\n")
|
||||
}
|
||||
|
||||
func (w *CodeWriter) writeSizeInfo(size int) {
|
||||
w.printf("// Size: %d bytes\n", size)
|
||||
}
|
||||
|
||||
// WriteConst writes a constant of the given name and value.
|
||||
func (w *CodeWriter) WriteConst(name string, x interface{}) {
|
||||
w.insertSep()
|
||||
v := reflect.ValueOf(x)
|
||||
|
||||
switch v.Type().Kind() {
|
||||
case reflect.String:
|
||||
w.printf("const %s %s = ", name, typeName(x))
|
||||
w.WriteString(v.String())
|
||||
w.printf("\n")
|
||||
default:
|
||||
w.printf("const %s = %#v\n", name, x)
|
||||
}
|
||||
}
|
||||
|
||||
// WriteVar writes a variable of the given name and value.
|
||||
func (w *CodeWriter) WriteVar(name string, x interface{}) {
|
||||
w.insertSep()
|
||||
v := reflect.ValueOf(x)
|
||||
oldSize := w.Size
|
||||
sz := int(v.Type().Size())
|
||||
w.Size += sz
|
||||
|
||||
switch v.Type().Kind() {
|
||||
case reflect.String:
|
||||
w.printf("var %s %s = ", name, typeName(x))
|
||||
w.WriteString(v.String())
|
||||
case reflect.Struct:
|
||||
w.gob.Encode(x)
|
||||
fallthrough
|
||||
case reflect.Slice, reflect.Array:
|
||||
w.printf("var %s = ", name)
|
||||
w.writeValue(v)
|
||||
w.writeSizeInfo(w.Size - oldSize)
|
||||
default:
|
||||
w.printf("var %s %s = ", name, typeName(x))
|
||||
w.gob.Encode(x)
|
||||
w.writeValue(v)
|
||||
w.writeSizeInfo(w.Size - oldSize)
|
||||
}
|
||||
w.printf("\n")
|
||||
}
|
||||
|
||||
func (w *CodeWriter) writeValue(v reflect.Value) {
|
||||
x := v.Interface()
|
||||
switch v.Kind() {
|
||||
case reflect.String:
|
||||
w.WriteString(v.String())
|
||||
case reflect.Array:
|
||||
// Don't double count: callers of WriteArray count on the size being
|
||||
// added, so we need to discount it here.
|
||||
w.Size -= int(v.Type().Size())
|
||||
w.writeSlice(x, true)
|
||||
case reflect.Slice:
|
||||
w.writeSlice(x, false)
|
||||
case reflect.Struct:
|
||||
w.printf("%s{\n", typeName(v.Interface()))
|
||||
t := v.Type()
|
||||
for i := 0; i < v.NumField(); i++ {
|
||||
w.printf("%s: ", t.Field(i).Name)
|
||||
w.writeValue(v.Field(i))
|
||||
w.printf(",\n")
|
||||
}
|
||||
w.printf("}")
|
||||
default:
|
||||
w.printf("%#v", x)
|
||||
}
|
||||
}
|
||||
|
||||
// WriteString writes a string literal.
|
||||
func (w *CodeWriter) WriteString(s string) {
|
||||
io.WriteString(w.Hash, s) // content hash
|
||||
w.Size += len(s)
|
||||
|
||||
const maxInline = 40
|
||||
if len(s) <= maxInline {
|
||||
w.printf("%q", s)
|
||||
return
|
||||
}
|
||||
|
||||
// We will render the string as a multi-line string.
|
||||
const maxWidth = 80 - 4 - len(`"`) - len(`" +`)
|
||||
|
||||
// When starting on its own line, go fmt indents line 2+ an extra level.
|
||||
n, max := maxWidth, maxWidth-4
|
||||
|
||||
// As per https://golang.org/issue/18078, the compiler has trouble
|
||||
// compiling the concatenation of many strings, s0 + s1 + s2 + ... + sN,
|
||||
// for large N. We insert redundant, explicit parentheses to work around
|
||||
// that, lowering the N at any given step: (s0 + s1 + ... + s63) + (s64 +
|
||||
// ... + s127) + etc + (etc + ... + sN).
|
||||
explicitParens, extraComment := len(s) > 128*1024, ""
|
||||
if explicitParens {
|
||||
w.printf(`(`)
|
||||
extraComment = "; the redundant, explicit parens are for https://golang.org/issue/18078"
|
||||
}
|
||||
|
||||
// Print "" +\n, if a string does not start on its own line.
|
||||
b := w.buf.Bytes()
|
||||
if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' {
|
||||
w.printf("\"\" + // Size: %d bytes%s\n", len(s), extraComment)
|
||||
n, max = maxWidth, maxWidth
|
||||
}
|
||||
|
||||
w.printf(`"`)
|
||||
|
||||
for sz, p, nLines := 0, 0, 0; p < len(s); {
|
||||
var r rune
|
||||
r, sz = utf8.DecodeRuneInString(s[p:])
|
||||
out := s[p : p+sz]
|
||||
chars := 1
|
||||
if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' {
|
||||
switch sz {
|
||||
case 1:
|
||||
out = fmt.Sprintf("\\x%02x", s[p])
|
||||
case 2, 3:
|
||||
out = fmt.Sprintf("\\u%04x", r)
|
||||
case 4:
|
||||
out = fmt.Sprintf("\\U%08x", r)
|
||||
}
|
||||
chars = len(out)
|
||||
} else if r == '\\' {
|
||||
out = "\\" + string(r)
|
||||
chars = 2
|
||||
}
|
||||
if n -= chars; n < 0 {
|
||||
nLines++
|
||||
if explicitParens && nLines&63 == 63 {
|
||||
w.printf("\") + (\"")
|
||||
}
|
||||
w.printf("\" +\n\"")
|
||||
n = max - len(out)
|
||||
}
|
||||
w.printf("%s", out)
|
||||
p += sz
|
||||
}
|
||||
w.printf(`"`)
|
||||
if explicitParens {
|
||||
w.printf(`)`)
|
||||
}
|
||||
}
|
||||
|
||||
// WriteSlice writes a slice value.
|
||||
func (w *CodeWriter) WriteSlice(x interface{}) {
|
||||
w.writeSlice(x, false)
|
||||
}
|
||||
|
||||
// WriteArray writes an array value.
|
||||
func (w *CodeWriter) WriteArray(x interface{}) {
|
||||
w.writeSlice(x, true)
|
||||
}
|
||||
|
||||
func (w *CodeWriter) writeSlice(x interface{}, isArray bool) {
|
||||
v := reflect.ValueOf(x)
|
||||
w.gob.Encode(v.Len())
|
||||
w.Size += v.Len() * int(v.Type().Elem().Size())
|
||||
name := typeName(x)
|
||||
if isArray {
|
||||
name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:])
|
||||
}
|
||||
if isArray {
|
||||
w.printf("%s{\n", name)
|
||||
} else {
|
||||
w.printf("%s{ // %d elements\n", name, v.Len())
|
||||
}
|
||||
|
||||
switch kind := v.Type().Elem().Kind(); kind {
|
||||
case reflect.String:
|
||||
for _, s := range x.([]string) {
|
||||
w.WriteString(s)
|
||||
w.printf(",\n")
|
||||
}
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
|
||||
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
||||
// nLine and nBlock are the number of elements per line and block.
|
||||
nLine, nBlock, format := 8, 64, "%d,"
|
||||
switch kind {
|
||||
case reflect.Uint8:
|
||||
format = "%#02x,"
|
||||
case reflect.Uint16:
|
||||
format = "%#04x,"
|
||||
case reflect.Uint32:
|
||||
nLine, nBlock, format = 4, 32, "%#08x,"
|
||||
case reflect.Uint, reflect.Uint64:
|
||||
nLine, nBlock, format = 4, 32, "%#016x,"
|
||||
case reflect.Int8:
|
||||
nLine = 16
|
||||
}
|
||||
n := nLine
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
if i%nBlock == 0 && v.Len() > nBlock {
|
||||
w.printf("// Entry %X - %X\n", i, i+nBlock-1)
|
||||
}
|
||||
x := v.Index(i).Interface()
|
||||
w.gob.Encode(x)
|
||||
w.printf(format, x)
|
||||
if n--; n == 0 {
|
||||
n = nLine
|
||||
w.printf("\n")
|
||||
}
|
||||
}
|
||||
w.printf("\n")
|
||||
case reflect.Struct:
|
||||
zero := reflect.Zero(v.Type().Elem()).Interface()
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
x := v.Index(i).Interface()
|
||||
w.gob.EncodeValue(v)
|
||||
if !reflect.DeepEqual(zero, x) {
|
||||
line := fmt.Sprintf("%#v,\n", x)
|
||||
line = line[strings.IndexByte(line, '{'):]
|
||||
w.printf("%d: ", i)
|
||||
w.printf(line)
|
||||
}
|
||||
}
|
||||
case reflect.Array:
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
w.printf("%d: %#v,\n", i, v.Index(i).Interface())
|
||||
}
|
||||
default:
|
||||
panic("gen: slice elem type not supported")
|
||||
}
|
||||
w.printf("}")
|
||||
}
|
||||
|
||||
// WriteType writes a definition of the type of the given value and returns the
|
||||
// type name.
|
||||
func (w *CodeWriter) WriteType(x interface{}) string {
|
||||
t := reflect.TypeOf(x)
|
||||
w.printf("type %s struct {\n", t.Name())
|
||||
for i := 0; i < t.NumField(); i++ {
|
||||
w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type)
|
||||
}
|
||||
w.printf("}\n")
|
||||
return t.Name()
|
||||
}
|
||||
|
||||
// typeName returns the name of the go type of x.
|
||||
func typeName(x interface{}) string {
|
||||
t := reflect.ValueOf(x).Type()
|
||||
return strings.Replace(fmt.Sprint(t), "main.", "", 1)
|
||||
}
|
347
vendor/golang.org/x/text/internal/gen/gen.go
generated
vendored
Normal file
347
vendor/golang.org/x/text/internal/gen/gen.go
generated
vendored
Normal file
@ -0,0 +1,347 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package gen contains common code for the various code generation tools in the
|
||||
// text repository. Its usage ensures consistency between tools.
|
||||
//
|
||||
// This package defines command line flags that are common to most generation
|
||||
// tools. The flags allow for specifying specific Unicode and CLDR versions
|
||||
// in the public Unicode data repository (https://www.unicode.org/Public).
|
||||
//
|
||||
// A local Unicode data mirror can be set through the flag -local or the
|
||||
// environment variable UNICODE_DIR. The former takes precedence. The local
|
||||
// directory should follow the same structure as the public repository.
|
||||
//
|
||||
// IANA data can also optionally be mirrored by putting it in the iana directory
|
||||
// rooted at the top of the local mirror. Beware, though, that IANA data is not
|
||||
// versioned. So it is up to the developer to use the right version.
|
||||
package gen // import "golang.org/x/text/internal/gen"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"go/build"
|
||||
"go/format"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
url = flag.String("url",
|
||||
"https://www.unicode.org/Public",
|
||||
"URL of Unicode database directory")
|
||||
iana = flag.String("iana",
|
||||
"http://www.iana.org",
|
||||
"URL of the IANA repository")
|
||||
unicodeVersion = flag.String("unicode",
|
||||
getEnv("UNICODE_VERSION", unicode.Version),
|
||||
"unicode version to use")
|
||||
cldrVersion = flag.String("cldr",
|
||||
getEnv("CLDR_VERSION", cldr.Version),
|
||||
"cldr version to use")
|
||||
)
|
||||
|
||||
func getEnv(name, def string) string {
|
||||
if v := os.Getenv(name); v != "" {
|
||||
return v
|
||||
}
|
||||
return def
|
||||
}
|
||||
|
||||
// Init performs common initialization for a gen command. It parses the flags
|
||||
// and sets up the standard logging parameters.
|
||||
func Init() {
|
||||
log.SetPrefix("")
|
||||
log.SetFlags(log.Lshortfile)
|
||||
flag.Parse()
|
||||
}
|
||||
|
||||
const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
`
|
||||
|
||||
// UnicodeVersion reports the requested Unicode version.
|
||||
func UnicodeVersion() string {
|
||||
return *unicodeVersion
|
||||
}
|
||||
|
||||
// CLDRVersion reports the requested CLDR version.
|
||||
func CLDRVersion() string {
|
||||
return *cldrVersion
|
||||
}
|
||||
|
||||
var tags = []struct{ version, buildTags string }{
|
||||
{"9.0.0", "!go1.10"},
|
||||
{"10.0.0", "go1.10,!go1.13"},
|
||||
{"11.0.0", "go1.13"},
|
||||
}
|
||||
|
||||
// buildTags reports the build tags used for the current Unicode version.
|
||||
func buildTags() string {
|
||||
v := UnicodeVersion()
|
||||
for _, e := range tags {
|
||||
if e.version == v {
|
||||
return e.buildTags
|
||||
}
|
||||
}
|
||||
log.Fatalf("Unknown build tags for Unicode version %q.", v)
|
||||
return ""
|
||||
}
|
||||
|
||||
// IsLocal reports whether data files are available locally.
|
||||
func IsLocal() bool {
|
||||
dir, err := localReadmeFile()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if _, err = os.Stat(dir); err != nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// OpenUCDFile opens the requested UCD file. The file is specified relative to
|
||||
// the public Unicode root directory. It will call log.Fatal if there are any
|
||||
// errors.
|
||||
func OpenUCDFile(file string) io.ReadCloser {
|
||||
return openUnicode(path.Join(*unicodeVersion, "ucd", file))
|
||||
}
|
||||
|
||||
// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there
|
||||
// are any errors.
|
||||
func OpenCLDRCoreZip() io.ReadCloser {
|
||||
return OpenUnicodeFile("cldr", *cldrVersion, "core.zip")
|
||||
}
|
||||
|
||||
// OpenUnicodeFile opens the requested file of the requested category from the
|
||||
// root of the Unicode data archive. The file is specified relative to the
|
||||
// public Unicode root directory. If version is "", it will use the default
|
||||
// Unicode version. It will call log.Fatal if there are any errors.
|
||||
func OpenUnicodeFile(category, version, file string) io.ReadCloser {
|
||||
if version == "" {
|
||||
version = UnicodeVersion()
|
||||
}
|
||||
return openUnicode(path.Join(category, version, file))
|
||||
}
|
||||
|
||||
// OpenIANAFile opens the requested IANA file. The file is specified relative
|
||||
// to the IANA root, which is typically either http://www.iana.org or the
|
||||
// iana directory in the local mirror. It will call log.Fatal if there are any
|
||||
// errors.
|
||||
func OpenIANAFile(path string) io.ReadCloser {
|
||||
return Open(*iana, "iana", path)
|
||||
}
|
||||
|
||||
var (
|
||||
dirMutex sync.Mutex
|
||||
localDir string
|
||||
)
|
||||
|
||||
const permissions = 0755
|
||||
|
||||
func localReadmeFile() (string, error) {
|
||||
p, err := build.Import("golang.org/x/text", "", build.FindOnly)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("Could not locate package: %v", err)
|
||||
}
|
||||
return filepath.Join(p.Dir, "DATA", "README"), nil
|
||||
}
|
||||
|
||||
func getLocalDir() string {
|
||||
dirMutex.Lock()
|
||||
defer dirMutex.Unlock()
|
||||
|
||||
readme, err := localReadmeFile()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
dir := filepath.Dir(readme)
|
||||
if _, err := os.Stat(readme); err != nil {
|
||||
if err := os.MkdirAll(dir, permissions); err != nil {
|
||||
log.Fatalf("Could not create directory: %v", err)
|
||||
}
|
||||
ioutil.WriteFile(readme, []byte(readmeTxt), permissions)
|
||||
}
|
||||
return dir
|
||||
}
|
||||
|
||||
const readmeTxt = `Generated by golang.org/x/text/internal/gen. DO NOT EDIT.
|
||||
|
||||
This directory contains downloaded files used to generate the various tables
|
||||
in the golang.org/x/text subrepo.
|
||||
|
||||
Note that the language subtag repo (iana/assignments/language-subtag-registry)
|
||||
and all other times in the iana subdirectory are not versioned and will need
|
||||
to be periodically manually updated. The easiest way to do this is to remove
|
||||
the entire iana directory. This is mostly of concern when updating the language
|
||||
package.
|
||||
`
|
||||
|
||||
// Open opens subdir/path if a local directory is specified and the file exists,
|
||||
// where subdir is a directory relative to the local root, or fetches it from
|
||||
// urlRoot/path otherwise. It will call log.Fatal if there are any errors.
|
||||
func Open(urlRoot, subdir, path string) io.ReadCloser {
|
||||
file := filepath.Join(getLocalDir(), subdir, filepath.FromSlash(path))
|
||||
return open(file, urlRoot, path)
|
||||
}
|
||||
|
||||
func openUnicode(path string) io.ReadCloser {
|
||||
file := filepath.Join(getLocalDir(), filepath.FromSlash(path))
|
||||
return open(file, *url, path)
|
||||
}
|
||||
|
||||
// TODO: automatically periodically update non-versioned files.
|
||||
|
||||
func open(file, urlRoot, path string) io.ReadCloser {
|
||||
if f, err := os.Open(file); err == nil {
|
||||
return f
|
||||
}
|
||||
r := get(urlRoot, path)
|
||||
defer r.Close()
|
||||
b, err := ioutil.ReadAll(r)
|
||||
if err != nil {
|
||||
log.Fatalf("Could not download file: %v", err)
|
||||
}
|
||||
os.MkdirAll(filepath.Dir(file), permissions)
|
||||
if err := ioutil.WriteFile(file, b, permissions); err != nil {
|
||||
log.Fatalf("Could not create file: %v", err)
|
||||
}
|
||||
return ioutil.NopCloser(bytes.NewReader(b))
|
||||
}
|
||||
|
||||
func get(root, path string) io.ReadCloser {
|
||||
url := root + "/" + path
|
||||
fmt.Printf("Fetching %s...", url)
|
||||
defer fmt.Println(" done.")
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
log.Fatalf("HTTP GET: %v", err)
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
log.Fatalf("Bad GET status for %q: %q", url, resp.Status)
|
||||
}
|
||||
return resp.Body
|
||||
}
|
||||
|
||||
// TODO: use Write*Version in all applicable packages.
|
||||
|
||||
// WriteUnicodeVersion writes a constant for the Unicode version from which the
|
||||
// tables are generated.
|
||||
func WriteUnicodeVersion(w io.Writer) {
|
||||
fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n")
|
||||
fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion())
|
||||
}
|
||||
|
||||
// WriteCLDRVersion writes a constant for the CLDR version from which the
|
||||
// tables are generated.
|
||||
func WriteCLDRVersion(w io.Writer) {
|
||||
fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n")
|
||||
fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion())
|
||||
}
|
||||
|
||||
// WriteGoFile prepends a standard file comment and package statement to the
|
||||
// given bytes, applies gofmt, and writes them to a file with the given name.
|
||||
// It will call log.Fatal if there are any errors.
|
||||
func WriteGoFile(filename, pkg string, b []byte) {
|
||||
w, err := os.Create(filename)
|
||||
if err != nil {
|
||||
log.Fatalf("Could not create file %s: %v", filename, err)
|
||||
}
|
||||
defer w.Close()
|
||||
if _, err = WriteGo(w, pkg, "", b); err != nil {
|
||||
log.Fatalf("Error writing file %s: %v", filename, err)
|
||||
}
|
||||
}
|
||||
|
||||
func fileToPattern(filename string) string {
|
||||
suffix := ".go"
|
||||
if strings.HasSuffix(filename, "_test.go") {
|
||||
suffix = "_test.go"
|
||||
}
|
||||
prefix := filename[:len(filename)-len(suffix)]
|
||||
return fmt.Sprint(prefix, "%s", suffix)
|
||||
}
|
||||
|
||||
func updateBuildTags(pattern string) {
|
||||
for _, t := range tags {
|
||||
oldFile := fmt.Sprintf(pattern, t.version)
|
||||
b, err := ioutil.ReadFile(oldFile)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
build := fmt.Sprintf("// +build %s", t.buildTags)
|
||||
b = regexp.MustCompile(`// \+build .*`).ReplaceAll(b, []byte(build))
|
||||
err = ioutil.WriteFile(oldFile, b, 0644)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WriteVersionedGoFile prepends a standard file comment, adds build tags to
|
||||
// version the file for the current Unicode version, and package statement to
|
||||
// the given bytes, applies gofmt, and writes them to a file with the given
|
||||
// name. It will call log.Fatal if there are any errors.
|
||||
func WriteVersionedGoFile(filename, pkg string, b []byte) {
|
||||
pattern := fileToPattern(filename)
|
||||
updateBuildTags(pattern)
|
||||
filename = fmt.Sprintf(pattern, UnicodeVersion())
|
||||
|
||||
w, err := os.Create(filename)
|
||||
if err != nil {
|
||||
log.Fatalf("Could not create file %s: %v", filename, err)
|
||||
}
|
||||
defer w.Close()
|
||||
if _, err = WriteGo(w, pkg, buildTags(), b); err != nil {
|
||||
log.Fatalf("Error writing file %s: %v", filename, err)
|
||||
}
|
||||
}
|
||||
|
||||
// WriteGo prepends a standard file comment and package statement to the given
|
||||
// bytes, applies gofmt, and writes them to w.
|
||||
func WriteGo(w io.Writer, pkg, tags string, b []byte) (n int, err error) {
|
||||
src := []byte(header)
|
||||
if tags != "" {
|
||||
src = append(src, fmt.Sprintf("// +build %s\n\n", tags)...)
|
||||
}
|
||||
src = append(src, fmt.Sprintf("package %s\n\n", pkg)...)
|
||||
src = append(src, b...)
|
||||
formatted, err := format.Source(src)
|
||||
if err != nil {
|
||||
// Print the generated code even in case of an error so that the
|
||||
// returned error can be meaningfully interpreted.
|
||||
n, _ = w.Write(src)
|
||||
return n, err
|
||||
}
|
||||
return w.Write(formatted)
|
||||
}
|
||||
|
||||
// Repackage rewrites a Go file from belonging to package main to belonging to
|
||||
// the given package.
|
||||
func Repackage(inFile, outFile, pkg string) {
|
||||
src, err := ioutil.ReadFile(inFile)
|
||||
if err != nil {
|
||||
log.Fatalf("reading %s: %v", inFile, err)
|
||||
}
|
||||
const toDelete = "package main\n\n"
|
||||
i := bytes.Index(src, []byte(toDelete))
|
||||
if i < 0 {
|
||||
log.Fatalf("Could not find %q in %s.", toDelete, inFile)
|
||||
}
|
||||
w := &bytes.Buffer{}
|
||||
w.Write(src[i+len(toDelete):])
|
||||
WriteGoFile(outFile, pkg, w.Bytes())
|
||||
}
|
16
vendor/golang.org/x/text/internal/language/common.go
generated
vendored
Normal file
16
vendor/golang.org/x/text/internal/language/common.go
generated
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package language
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// AliasType is the type of an alias in AliasMap.
|
||||
type AliasType int8
|
||||
|
||||
const (
|
||||
Deprecated AliasType = iota
|
||||
Macro
|
||||
Legacy
|
||||
|
||||
AliasTypeUnknown AliasType = -1
|
||||
)
|
29
vendor/golang.org/x/text/internal/language/compact.go
generated
vendored
Normal file
29
vendor/golang.org/x/text/internal/language/compact.go
generated
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// CompactCoreInfo is a compact integer with the three core tags encoded.
|
||||
type CompactCoreInfo uint32
|
||||
|
||||
// GetCompactCore generates a uint32 value that is guaranteed to be unique for
|
||||
// different language, region, and script values.
|
||||
func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
|
||||
if t.LangID > langNoIndexOffset {
|
||||
return 0, false
|
||||
}
|
||||
cci |= CompactCoreInfo(t.LangID) << (8 + 12)
|
||||
cci |= CompactCoreInfo(t.ScriptID) << 12
|
||||
cci |= CompactCoreInfo(t.RegionID)
|
||||
return cci, true
|
||||
}
|
||||
|
||||
// Tag generates a tag from c.
|
||||
func (c CompactCoreInfo) Tag() Tag {
|
||||
return Tag{
|
||||
LangID: Language(c >> 20),
|
||||
RegionID: Region(c & 0x3ff),
|
||||
ScriptID: Script(c>>12) & 0xff,
|
||||
}
|
||||
}
|
61
vendor/golang.org/x/text/internal/language/compact/compact.go
generated
vendored
Normal file
61
vendor/golang.org/x/text/internal/language/compact/compact.go
generated
vendored
Normal file
@ -0,0 +1,61 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package compact defines a compact representation of language tags.
|
||||
//
|
||||
// Common language tags (at least all for which locale information is defined
|
||||
// in CLDR) are assigned a unique index. Each Tag is associated with such an
|
||||
// ID for selecting language-related resources (such as translations) as well
|
||||
// as one for selecting regional defaults (currency, number formatting, etc.)
|
||||
//
|
||||
// It may want to export this functionality at some point, but at this point
|
||||
// this is only available for use within x/text.
|
||||
package compact // import "golang.org/x/text/internal/language/compact"
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// ID is an integer identifying a single tag.
|
||||
type ID uint16
|
||||
|
||||
func getCoreIndex(t language.Tag) (id ID, ok bool) {
|
||||
cci, ok := language.GetCompactCore(t)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
i := sort.Search(len(coreTags), func(i int) bool {
|
||||
return cci <= coreTags[i]
|
||||
})
|
||||
if i == len(coreTags) || coreTags[i] != cci {
|
||||
return 0, false
|
||||
}
|
||||
return ID(i), true
|
||||
}
|
||||
|
||||
// Parent returns the ID of the parent or the root ID if id is already the root.
|
||||
func (id ID) Parent() ID {
|
||||
return parents[id]
|
||||
}
|
||||
|
||||
// Tag converts id to an internal language Tag.
|
||||
func (id ID) Tag() language.Tag {
|
||||
if int(id) >= len(coreTags) {
|
||||
return specialTags[int(id)-len(coreTags)]
|
||||
}
|
||||
return coreTags[id].Tag()
|
||||
}
|
||||
|
||||
var specialTags []language.Tag
|
||||
|
||||
func init() {
|
||||
tags := strings.Split(specialTagsStr, " ")
|
||||
specialTags = make([]language.Tag, len(tags))
|
||||
for i, t := range tags {
|
||||
specialTags[i] = language.MustParse(t)
|
||||
}
|
||||
}
|
64
vendor/golang.org/x/text/internal/language/compact/gen.go
generated
vendored
Normal file
64
vendor/golang.org/x/text/internal/language/compact/gen.go
generated
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Language tag table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output",
|
||||
"tables.go",
|
||||
"output file for generated tables")
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "compact")
|
||||
|
||||
fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
|
||||
|
||||
b := newBuilder(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
b.writeCompactIndex()
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w *gen.CodeWriter
|
||||
data *cldr.CLDR
|
||||
supp *cldr.SupplementalData
|
||||
}
|
||||
|
||||
func newBuilder(w *gen.CodeWriter) *builder {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
b := builder{
|
||||
w: w,
|
||||
data: data,
|
||||
supp: data.Supplemental(),
|
||||
}
|
||||
return &b
|
||||
}
|
113
vendor/golang.org/x/text/internal/language/compact/gen_index.go
generated
vendored
Normal file
113
vendor/golang.org/x/text/internal/language/compact/gen_index.go
generated
vendored
Normal file
@ -0,0 +1,113 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file generates derivative tables based on the language package itself.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// Compact indices:
|
||||
// Note -va-X variants only apply to localization variants.
|
||||
// BCP variants only ever apply to language.
|
||||
// The only ambiguity between tags is with regions.
|
||||
|
||||
func (b *builder) writeCompactIndex() {
|
||||
// Collect all language tags for which we have any data in CLDR.
|
||||
m := map[language.Tag]bool{}
|
||||
for _, lang := range b.data.Locales() {
|
||||
// We include all locales unconditionally to be consistent with en_US.
|
||||
// We want en_US, even though it has no data associated with it.
|
||||
|
||||
// TODO: put any of the languages for which no data exists at the end
|
||||
// of the index. This allows all components based on ICU to use that
|
||||
// as the cutoff point.
|
||||
// if x := data.RawLDML(lang); false ||
|
||||
// x.LocaleDisplayNames != nil ||
|
||||
// x.Characters != nil ||
|
||||
// x.Delimiters != nil ||
|
||||
// x.Measurement != nil ||
|
||||
// x.Dates != nil ||
|
||||
// x.Numbers != nil ||
|
||||
// x.Units != nil ||
|
||||
// x.ListPatterns != nil ||
|
||||
// x.Collations != nil ||
|
||||
// x.Segmentations != nil ||
|
||||
// x.Rbnf != nil ||
|
||||
// x.Annotations != nil ||
|
||||
// x.Metadata != nil {
|
||||
|
||||
// TODO: support POSIX natively, albeit non-standard.
|
||||
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
|
||||
m[tag] = true
|
||||
// }
|
||||
}
|
||||
|
||||
// TODO: plural rules are also defined for the deprecated tags:
|
||||
// iw mo sh tl
|
||||
// Consider removing these as compact tags.
|
||||
|
||||
// Include locales for plural rules, which uses a different structure.
|
||||
for _, plurals := range b.supp.Plurals {
|
||||
for _, rules := range plurals.PluralRules {
|
||||
for _, lang := range strings.Split(rules.Locales, " ") {
|
||||
m[language.Make(lang)] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var coreTags []language.CompactCoreInfo
|
||||
var special []string
|
||||
|
||||
for t := range m {
|
||||
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
|
||||
log.Fatalf("Unexpected extension %v in %v", x, t)
|
||||
}
|
||||
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
|
||||
cci, ok := language.GetCompactCore(t)
|
||||
if !ok {
|
||||
log.Fatalf("Locale for non-basic language %q", t)
|
||||
}
|
||||
coreTags = append(coreTags, cci)
|
||||
} else {
|
||||
special = append(special, t.String())
|
||||
}
|
||||
}
|
||||
|
||||
w := b.w
|
||||
|
||||
sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
|
||||
sort.Strings(special)
|
||||
|
||||
w.WriteComment(`
|
||||
NumCompactTags is the number of common tags. The maximum tag is
|
||||
NumCompactTags-1.`)
|
||||
w.WriteConst("NumCompactTags", len(m))
|
||||
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, t := range coreTags {
|
||||
fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
|
||||
}
|
||||
for i, t := range special {
|
||||
fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
|
||||
}
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
w.WriteVar("coreTags", coreTags)
|
||||
|
||||
w.WriteConst("specialTagsStr", strings.Join(special, " "))
|
||||
}
|
||||
|
||||
func ident(s string) string {
|
||||
return strings.Replace(s, "-", "", -1) + "Index"
|
||||
}
|
54
vendor/golang.org/x/text/internal/language/compact/gen_parents.go
generated
vendored
Normal file
54
vendor/golang.org/x/text/internal/language/compact/gen_parents.go
generated
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
func main() {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("parents.go", "compact")
|
||||
|
||||
// Create parents table.
|
||||
type ID uint16
|
||||
parents := make([]ID, compact.NumCompactTags)
|
||||
for _, loc := range data.Locales() {
|
||||
tag := language.MustParse(loc)
|
||||
index, ok := compact.FromTag(tag)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
parentIndex := compact.ID(0) // und
|
||||
for p := tag.Parent(); p != language.Und; p = p.Parent() {
|
||||
if x, ok := compact.FromTag(p); ok {
|
||||
parentIndex = x
|
||||
break
|
||||
}
|
||||
}
|
||||
parents[index] = ID(parentIndex)
|
||||
}
|
||||
|
||||
w.WriteComment(`
|
||||
parents maps a compact index of a tag to the compact index of the parent of
|
||||
this tag.`)
|
||||
w.WriteVar("parents", parents)
|
||||
}
|
260
vendor/golang.org/x/text/internal/language/compact/language.go
generated
vendored
Normal file
260
vendor/golang.org/x/text/internal/language/compact/language.go
generated
vendored
Normal file
@ -0,0 +1,260 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_index.go -output tables.go
|
||||
//go:generate go run gen_parents.go
|
||||
|
||||
package compact
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed.
|
||||
type Tag struct {
|
||||
// NOTE: exported tags will become part of the public API.
|
||||
language ID
|
||||
locale ID
|
||||
full fullTag // always a language.Tag for now.
|
||||
}
|
||||
|
||||
const _und = 0
|
||||
|
||||
type fullTag interface {
|
||||
IsRoot() bool
|
||||
Parent() language.Tag
|
||||
}
|
||||
|
||||
// Make a compact Tag from a fully specified internal language Tag.
|
||||
func Make(t language.Tag) (tag Tag) {
|
||||
if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
|
||||
if r, err := language.ParseRegion(region[:2]); err == nil {
|
||||
tFull := t
|
||||
t, _ = t.SetTypeForKey("rg", "")
|
||||
// TODO: should we not consider "va" for the language tag?
|
||||
var exact1, exact2 bool
|
||||
tag.language, exact1 = FromTag(t)
|
||||
t.RegionID = r
|
||||
tag.locale, exact2 = FromTag(t)
|
||||
if !exact1 || !exact2 {
|
||||
tag.full = tFull
|
||||
}
|
||||
return tag
|
||||
}
|
||||
}
|
||||
lang, ok := FromTag(t)
|
||||
tag.language = lang
|
||||
tag.locale = lang
|
||||
if !ok {
|
||||
tag.full = t
|
||||
}
|
||||
return tag
|
||||
}
|
||||
|
||||
// Tag returns an internal language Tag version of this tag.
|
||||
func (t Tag) Tag() language.Tag {
|
||||
if t.full != nil {
|
||||
return t.full.(language.Tag)
|
||||
}
|
||||
tag := t.language.Tag()
|
||||
if t.language != t.locale {
|
||||
loc := t.locale.Tag()
|
||||
tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
|
||||
}
|
||||
return tag
|
||||
}
|
||||
|
||||
// IsCompact reports whether this tag is fully defined in terms of ID.
|
||||
func (t *Tag) IsCompact() bool {
|
||||
return t.full == nil
|
||||
}
|
||||
|
||||
// MayHaveVariants reports whether a tag may have variants. If it returns false
|
||||
// it is guaranteed the tag does not have variants.
|
||||
func (t Tag) MayHaveVariants() bool {
|
||||
return t.full != nil || int(t.language) >= len(coreTags)
|
||||
}
|
||||
|
||||
// MayHaveExtensions reports whether a tag may have extensions. If it returns
|
||||
// false it is guaranteed the tag does not have them.
|
||||
func (t Tag) MayHaveExtensions() bool {
|
||||
return t.full != nil ||
|
||||
int(t.language) >= len(coreTags) ||
|
||||
t.language != t.locale
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
if t.full != nil {
|
||||
return t.full.IsRoot()
|
||||
}
|
||||
return t.language == _und
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
func (t Tag) Parent() Tag {
|
||||
if t.full != nil {
|
||||
return Make(t.full.Parent())
|
||||
}
|
||||
if t.language != t.locale {
|
||||
// Simulate stripping -u-rg-xxxxxx
|
||||
return Tag{language: t.language, locale: t.language}
|
||||
}
|
||||
// TODO: use parent lookup table once cycle from internal package is
|
||||
// removed. Probably by internalizing the table and declaring this fast
|
||||
// enough.
|
||||
// lang := compactID(internal.Parent(uint16(t.language)))
|
||||
lang, _ := FromTag(t.language.Tag().Parent())
|
||||
return Tag{language: lang, locale: lang}
|
||||
}
|
||||
|
||||
// returns token t and the rest of the string.
|
||||
func nextToken(s string) (t, tail string) {
|
||||
p := strings.Index(s[1:], "-")
|
||||
if p == -1 {
|
||||
return s[1:], ""
|
||||
}
|
||||
p++
|
||||
return s[1:p], s[p:]
|
||||
}
|
||||
|
||||
// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
|
||||
// for which data exists in the text repository.The index will change over time
|
||||
// and should not be stored in persistent storage. If t does not match a compact
|
||||
// index, exact will be false and the compact index will be returned for the
|
||||
// first match after repeatedly taking the Parent of t.
|
||||
func LanguageID(t Tag) (id ID, exact bool) {
|
||||
return t.language, t.full == nil
|
||||
}
|
||||
|
||||
// RegionalID returns the ID for the regional variant of this tag. This index is
|
||||
// used to indicate region-specific overrides, such as default currency, default
|
||||
// calendar and week data, default time cycle, and default measurement system
|
||||
// and unit preferences.
|
||||
//
|
||||
// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
|
||||
// settings for currency, number formatting, etc. The CompactIndex for this tag
|
||||
// will be that for en-GB, while the RegionalID will be the one corresponding to
|
||||
// en-US.
|
||||
func RegionalID(t Tag) (id ID, exact bool) {
|
||||
return t.locale, t.full == nil
|
||||
}
|
||||
|
||||
// LanguageTag returns t stripped of regional variant indicators.
|
||||
//
|
||||
// At the moment this means it is stripped of a regional and variant subtag "rg"
|
||||
// and "va" in the "u" extension.
|
||||
func (t Tag) LanguageTag() Tag {
|
||||
if t.full == nil {
|
||||
return Tag{language: t.language, locale: t.language}
|
||||
}
|
||||
tt := t.Tag()
|
||||
tt.SetTypeForKey("rg", "")
|
||||
tt.SetTypeForKey("va", "")
|
||||
return Make(tt)
|
||||
}
|
||||
|
||||
// RegionalTag returns the regional variant of the tag.
|
||||
//
|
||||
// At the moment this means that the region is set from the regional subtag
|
||||
// "rg" in the "u" extension.
|
||||
func (t Tag) RegionalTag() Tag {
|
||||
rt := Tag{language: t.locale, locale: t.locale}
|
||||
if t.full == nil {
|
||||
return rt
|
||||
}
|
||||
b := language.Builder{}
|
||||
tag := t.Tag()
|
||||
// tag, _ = tag.SetTypeForKey("rg", "")
|
||||
b.SetTag(t.locale.Tag())
|
||||
if v := tag.Variants(); v != "" {
|
||||
for _, v := range strings.Split(v, "-") {
|
||||
b.AddVariant(v)
|
||||
}
|
||||
}
|
||||
for _, e := range tag.Extensions() {
|
||||
b.AddExt(e)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// FromTag reports closest matching ID for an internal language Tag.
|
||||
func FromTag(t language.Tag) (id ID, exact bool) {
|
||||
// TODO: perhaps give more frequent tags a lower index.
|
||||
// TODO: we could make the indexes stable. This will excluded some
|
||||
// possibilities for optimization, so don't do this quite yet.
|
||||
exact = true
|
||||
|
||||
b, s, r := t.Raw()
|
||||
if t.HasString() {
|
||||
if t.IsPrivateUse() {
|
||||
// We have no entries for user-defined tags.
|
||||
return 0, false
|
||||
}
|
||||
hasExtra := false
|
||||
if t.HasVariants() {
|
||||
if t.HasExtensions() {
|
||||
build := language.Builder{}
|
||||
build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
|
||||
build.AddVariant(t.Variants())
|
||||
exact = false
|
||||
t = build.Make()
|
||||
}
|
||||
hasExtra = true
|
||||
} else if _, ok := t.Extension('u'); ok {
|
||||
// TODO: va may mean something else. Consider not considering it.
|
||||
// Strip all but the 'va' entry.
|
||||
old := t
|
||||
variant := t.TypeForKey("va")
|
||||
t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
|
||||
if variant != "" {
|
||||
t, _ = t.SetTypeForKey("va", variant)
|
||||
hasExtra = true
|
||||
}
|
||||
exact = old == t
|
||||
} else {
|
||||
exact = false
|
||||
}
|
||||
if hasExtra {
|
||||
// We have some variants.
|
||||
for i, s := range specialTags {
|
||||
if s == t {
|
||||
return ID(i + len(coreTags)), exact
|
||||
}
|
||||
}
|
||||
exact = false
|
||||
}
|
||||
}
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
exact = false
|
||||
if r != 0 && s == 0 {
|
||||
// Deal with cases where an extra script is inserted for the region.
|
||||
t, _ := t.Maximize()
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
}
|
||||
for t = t.Parent(); t != root; t = t.Parent() {
|
||||
// No variants specified: just compare core components.
|
||||
// The key has the form lllssrrr, where l, s, and r are nibbles for
|
||||
// respectively the langID, scriptID, and regionID.
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
}
|
||||
return 0, exact
|
||||
}
|
||||
|
||||
var root = language.Tag{}
|
120
vendor/golang.org/x/text/internal/language/compact/parents.go
generated
vendored
Normal file
120
vendor/golang.org/x/text/internal/language/compact/parents.go
generated
vendored
Normal file
@ -0,0 +1,120 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package compact
|
||||
|
||||
// parents maps a compact index of a tag to the compact index of the parent of
|
||||
// this tag.
|
||||
var parents = []ID{ // 775 elements
|
||||
// Entry 0 - 3F
|
||||
0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
|
||||
0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
|
||||
0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
|
||||
0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
|
||||
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
|
||||
// Entry 40 - 7F
|
||||
0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
|
||||
0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
|
||||
0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
|
||||
0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
|
||||
0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
|
||||
0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
|
||||
0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
|
||||
0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
|
||||
// Entry 80 - BF
|
||||
0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
|
||||
0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
|
||||
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
|
||||
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
|
||||
// Entry C0 - FF
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
|
||||
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
|
||||
0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
|
||||
0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
|
||||
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
|
||||
// Entry 100 - 13F
|
||||
0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
|
||||
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
|
||||
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
|
||||
0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
|
||||
0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
// Entry 140 - 17F
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
|
||||
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
|
||||
0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
|
||||
0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
|
||||
0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
|
||||
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
|
||||
// Entry 180 - 1BF
|
||||
0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
|
||||
0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
|
||||
0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
|
||||
0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
|
||||
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
|
||||
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
|
||||
0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
|
||||
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
|
||||
// Entry 1C0 - 1FF
|
||||
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
|
||||
0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
|
||||
0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
|
||||
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
|
||||
0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
|
||||
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
|
||||
0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
|
||||
0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
|
||||
// Entry 200 - 23F
|
||||
0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
|
||||
0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
|
||||
0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
|
||||
0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
|
||||
0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
|
||||
0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
|
||||
0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
|
||||
// Entry 240 - 27F
|
||||
0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
|
||||
0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
|
||||
0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
|
||||
0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
|
||||
0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
|
||||
0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
|
||||
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
|
||||
0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
|
||||
// Entry 280 - 2BF
|
||||
0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
|
||||
0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
|
||||
0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
|
||||
0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
|
||||
0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
|
||||
0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
|
||||
0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
|
||||
0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
|
||||
// Entry 2C0 - 2FF
|
||||
0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
|
||||
0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
|
||||
0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
|
||||
0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
|
||||
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
|
||||
0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
|
||||
0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
|
||||
0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
|
||||
// Entry 300 - 33F
|
||||
0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
|
||||
} // Size: 1574 bytes
|
||||
|
||||
// Total table size 1574 bytes (1KiB); checksum: 895AAF0B
|
1015
vendor/golang.org/x/text/internal/language/compact/tables.go
generated
vendored
Normal file
1015
vendor/golang.org/x/text/internal/language/compact/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
91
vendor/golang.org/x/text/internal/language/compact/tags.go
generated
vendored
Normal file
91
vendor/golang.org/x/text/internal/language/compact/tags.go
generated
vendored
Normal file
@ -0,0 +1,91 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package compact
|
||||
|
||||
var (
|
||||
und = Tag{}
|
||||
|
||||
Und Tag = Tag{}
|
||||
|
||||
Afrikaans Tag = Tag{language: afIndex, locale: afIndex}
|
||||
Amharic Tag = Tag{language: amIndex, locale: amIndex}
|
||||
Arabic Tag = Tag{language: arIndex, locale: arIndex}
|
||||
ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
|
||||
Azerbaijani Tag = Tag{language: azIndex, locale: azIndex}
|
||||
Bulgarian Tag = Tag{language: bgIndex, locale: bgIndex}
|
||||
Bengali Tag = Tag{language: bnIndex, locale: bnIndex}
|
||||
Catalan Tag = Tag{language: caIndex, locale: caIndex}
|
||||
Czech Tag = Tag{language: csIndex, locale: csIndex}
|
||||
Danish Tag = Tag{language: daIndex, locale: daIndex}
|
||||
German Tag = Tag{language: deIndex, locale: deIndex}
|
||||
Greek Tag = Tag{language: elIndex, locale: elIndex}
|
||||
English Tag = Tag{language: enIndex, locale: enIndex}
|
||||
AmericanEnglish Tag = Tag{language: enUSIndex, locale: enUSIndex}
|
||||
BritishEnglish Tag = Tag{language: enGBIndex, locale: enGBIndex}
|
||||
Spanish Tag = Tag{language: esIndex, locale: esIndex}
|
||||
EuropeanSpanish Tag = Tag{language: esESIndex, locale: esESIndex}
|
||||
LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
|
||||
Estonian Tag = Tag{language: etIndex, locale: etIndex}
|
||||
Persian Tag = Tag{language: faIndex, locale: faIndex}
|
||||
Finnish Tag = Tag{language: fiIndex, locale: fiIndex}
|
||||
Filipino Tag = Tag{language: filIndex, locale: filIndex}
|
||||
French Tag = Tag{language: frIndex, locale: frIndex}
|
||||
CanadianFrench Tag = Tag{language: frCAIndex, locale: frCAIndex}
|
||||
Gujarati Tag = Tag{language: guIndex, locale: guIndex}
|
||||
Hebrew Tag = Tag{language: heIndex, locale: heIndex}
|
||||
Hindi Tag = Tag{language: hiIndex, locale: hiIndex}
|
||||
Croatian Tag = Tag{language: hrIndex, locale: hrIndex}
|
||||
Hungarian Tag = Tag{language: huIndex, locale: huIndex}
|
||||
Armenian Tag = Tag{language: hyIndex, locale: hyIndex}
|
||||
Indonesian Tag = Tag{language: idIndex, locale: idIndex}
|
||||
Icelandic Tag = Tag{language: isIndex, locale: isIndex}
|
||||
Italian Tag = Tag{language: itIndex, locale: itIndex}
|
||||
Japanese Tag = Tag{language: jaIndex, locale: jaIndex}
|
||||
Georgian Tag = Tag{language: kaIndex, locale: kaIndex}
|
||||
Kazakh Tag = Tag{language: kkIndex, locale: kkIndex}
|
||||
Khmer Tag = Tag{language: kmIndex, locale: kmIndex}
|
||||
Kannada Tag = Tag{language: knIndex, locale: knIndex}
|
||||
Korean Tag = Tag{language: koIndex, locale: koIndex}
|
||||
Kirghiz Tag = Tag{language: kyIndex, locale: kyIndex}
|
||||
Lao Tag = Tag{language: loIndex, locale: loIndex}
|
||||
Lithuanian Tag = Tag{language: ltIndex, locale: ltIndex}
|
||||
Latvian Tag = Tag{language: lvIndex, locale: lvIndex}
|
||||
Macedonian Tag = Tag{language: mkIndex, locale: mkIndex}
|
||||
Malayalam Tag = Tag{language: mlIndex, locale: mlIndex}
|
||||
Mongolian Tag = Tag{language: mnIndex, locale: mnIndex}
|
||||
Marathi Tag = Tag{language: mrIndex, locale: mrIndex}
|
||||
Malay Tag = Tag{language: msIndex, locale: msIndex}
|
||||
Burmese Tag = Tag{language: myIndex, locale: myIndex}
|
||||
Nepali Tag = Tag{language: neIndex, locale: neIndex}
|
||||
Dutch Tag = Tag{language: nlIndex, locale: nlIndex}
|
||||
Norwegian Tag = Tag{language: noIndex, locale: noIndex}
|
||||
Punjabi Tag = Tag{language: paIndex, locale: paIndex}
|
||||
Polish Tag = Tag{language: plIndex, locale: plIndex}
|
||||
Portuguese Tag = Tag{language: ptIndex, locale: ptIndex}
|
||||
BrazilianPortuguese Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
|
||||
EuropeanPortuguese Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
|
||||
Romanian Tag = Tag{language: roIndex, locale: roIndex}
|
||||
Russian Tag = Tag{language: ruIndex, locale: ruIndex}
|
||||
Sinhala Tag = Tag{language: siIndex, locale: siIndex}
|
||||
Slovak Tag = Tag{language: skIndex, locale: skIndex}
|
||||
Slovenian Tag = Tag{language: slIndex, locale: slIndex}
|
||||
Albanian Tag = Tag{language: sqIndex, locale: sqIndex}
|
||||
Serbian Tag = Tag{language: srIndex, locale: srIndex}
|
||||
SerbianLatin Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
|
||||
Swedish Tag = Tag{language: svIndex, locale: svIndex}
|
||||
Swahili Tag = Tag{language: swIndex, locale: swIndex}
|
||||
Tamil Tag = Tag{language: taIndex, locale: taIndex}
|
||||
Telugu Tag = Tag{language: teIndex, locale: teIndex}
|
||||
Thai Tag = Tag{language: thIndex, locale: thIndex}
|
||||
Turkish Tag = Tag{language: trIndex, locale: trIndex}
|
||||
Ukrainian Tag = Tag{language: ukIndex, locale: ukIndex}
|
||||
Urdu Tag = Tag{language: urIndex, locale: urIndex}
|
||||
Uzbek Tag = Tag{language: uzIndex, locale: uzIndex}
|
||||
Vietnamese Tag = Tag{language: viIndex, locale: viIndex}
|
||||
Chinese Tag = Tag{language: zhIndex, locale: zhIndex}
|
||||
SimplifiedChinese Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
|
||||
TraditionalChinese Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
|
||||
Zulu Tag = Tag{language: zuIndex, locale: zuIndex}
|
||||
)
|
167
vendor/golang.org/x/text/internal/language/compose.go
generated
vendored
Normal file
167
vendor/golang.org/x/text/internal/language/compose.go
generated
vendored
Normal file
@ -0,0 +1,167 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Builder allows constructing a Tag from individual components.
|
||||
// Its main user is Compose in the top-level language package.
|
||||
type Builder struct {
|
||||
Tag Tag
|
||||
|
||||
private string // the x extension
|
||||
variants []string
|
||||
extensions []string
|
||||
}
|
||||
|
||||
// Make returns a new Tag from the current settings.
|
||||
func (b *Builder) Make() Tag {
|
||||
t := b.Tag
|
||||
|
||||
if len(b.extensions) > 0 || len(b.variants) > 0 {
|
||||
sort.Sort(sortVariants(b.variants))
|
||||
sort.Strings(b.extensions)
|
||||
|
||||
if b.private != "" {
|
||||
b.extensions = append(b.extensions, b.private)
|
||||
}
|
||||
n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
|
||||
buf := make([]byte, n)
|
||||
p := t.genCoreBytes(buf)
|
||||
t.pVariant = byte(p)
|
||||
p += appendTokens(buf[p:], b.variants...)
|
||||
t.pExt = uint16(p)
|
||||
p += appendTokens(buf[p:], b.extensions...)
|
||||
t.str = string(buf[:p])
|
||||
// We may not always need to remake the string, but when or when not
|
||||
// to do so is rather tricky.
|
||||
scan := makeScanner(buf[:p])
|
||||
t, _ = parse(&scan, "")
|
||||
return t
|
||||
|
||||
} else if b.private != "" {
|
||||
t.str = b.private
|
||||
t.RemakeString()
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// SetTag copies all the settings from a given Tag. Any previously set values
|
||||
// are discarded.
|
||||
func (b *Builder) SetTag(t Tag) {
|
||||
b.Tag.LangID = t.LangID
|
||||
b.Tag.RegionID = t.RegionID
|
||||
b.Tag.ScriptID = t.ScriptID
|
||||
// TODO: optimize
|
||||
b.variants = b.variants[:0]
|
||||
if variants := t.Variants(); variants != "" {
|
||||
for _, vr := range strings.Split(variants[1:], "-") {
|
||||
b.variants = append(b.variants, vr)
|
||||
}
|
||||
}
|
||||
b.extensions, b.private = b.extensions[:0], ""
|
||||
for _, e := range t.Extensions() {
|
||||
b.AddExt(e)
|
||||
}
|
||||
}
|
||||
|
||||
// AddExt adds extension e to the tag. e must be a valid extension as returned
|
||||
// by Tag.Extension. If the extension already exists, it will be discarded,
|
||||
// except for a -u extension, where non-existing key-type pairs will added.
|
||||
func (b *Builder) AddExt(e string) {
|
||||
if e[0] == 'x' {
|
||||
if b.private == "" {
|
||||
b.private = e
|
||||
}
|
||||
return
|
||||
}
|
||||
for i, s := range b.extensions {
|
||||
if s[0] == e[0] {
|
||||
if e[0] == 'u' {
|
||||
b.extensions[i] += e[1:]
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
b.extensions = append(b.extensions, e)
|
||||
}
|
||||
|
||||
// SetExt sets the extension e to the tag. e must be a valid extension as
|
||||
// returned by Tag.Extension. If the extension already exists, it will be
|
||||
// overwritten, except for a -u extension, where the individual key-type pairs
|
||||
// will be set.
|
||||
func (b *Builder) SetExt(e string) {
|
||||
if e[0] == 'x' {
|
||||
b.private = e
|
||||
return
|
||||
}
|
||||
for i, s := range b.extensions {
|
||||
if s[0] == e[0] {
|
||||
if e[0] == 'u' {
|
||||
b.extensions[i] = e + s[1:]
|
||||
} else {
|
||||
b.extensions[i] = e
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
b.extensions = append(b.extensions, e)
|
||||
}
|
||||
|
||||
// AddVariant adds any number of variants.
|
||||
func (b *Builder) AddVariant(v ...string) {
|
||||
for _, v := range v {
|
||||
if v != "" {
|
||||
b.variants = append(b.variants, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ClearVariants removes any variants previously added, including those
|
||||
// copied from a Tag in SetTag.
|
||||
func (b *Builder) ClearVariants() {
|
||||
b.variants = b.variants[:0]
|
||||
}
|
||||
|
||||
// ClearExtensions removes any extensions previously added, including those
|
||||
// copied from a Tag in SetTag.
|
||||
func (b *Builder) ClearExtensions() {
|
||||
b.private = ""
|
||||
b.extensions = b.extensions[:0]
|
||||
}
|
||||
|
||||
func tokenLen(token ...string) (n int) {
|
||||
for _, t := range token {
|
||||
n += len(t) + 1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func appendTokens(b []byte, token ...string) int {
|
||||
p := 0
|
||||
for _, t := range token {
|
||||
b[p] = '-'
|
||||
copy(b[p+1:], t)
|
||||
p += 1 + len(t)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
type sortVariants []string
|
||||
|
||||
func (s sortVariants) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sortVariants) Swap(i, j int) {
|
||||
s[j], s[i] = s[i], s[j]
|
||||
}
|
||||
|
||||
func (s sortVariants) Less(i, j int) bool {
|
||||
return variantIndex[s[i]] < variantIndex[s[j]]
|
||||
}
|
28
vendor/golang.org/x/text/internal/language/coverage.go
generated
vendored
Normal file
28
vendor/golang.org/x/text/internal/language/coverage.go
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// BaseLanguages returns the list of all supported base languages. It generates
|
||||
// the list by traversing the internal structures.
|
||||
func BaseLanguages() []Language {
|
||||
base := make([]Language, 0, NumLanguages)
|
||||
for i := 0; i < langNoIndexOffset; i++ {
|
||||
// We included "und" already for the value 0.
|
||||
if i != nonCanonicalUnd {
|
||||
base = append(base, Language(i))
|
||||
}
|
||||
}
|
||||
i := langNoIndexOffset
|
||||
for _, v := range langNoIndex {
|
||||
for k := 0; k < 8; k++ {
|
||||
if v&1 == 1 {
|
||||
base = append(base, Language(i))
|
||||
}
|
||||
v >>= 1
|
||||
i++
|
||||
}
|
||||
}
|
||||
return base
|
||||
}
|
1520
vendor/golang.org/x/text/internal/language/gen.go
generated
vendored
Normal file
1520
vendor/golang.org/x/text/internal/language/gen.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
20
vendor/golang.org/x/text/internal/language/gen_common.go
generated
vendored
Normal file
20
vendor/golang.org/x/text/internal/language/gen_common.go
generated
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// AliasType is the type of an alias in AliasMap.
|
||||
type AliasType int8
|
||||
|
||||
const (
|
||||
Deprecated AliasType = iota
|
||||
Macro
|
||||
Legacy
|
||||
|
||||
AliasTypeUnknown AliasType = -1
|
||||
)
|
596
vendor/golang.org/x/text/internal/language/language.go
generated
vendored
Normal file
596
vendor/golang.org/x/text/internal/language/language.go
generated
vendored
Normal file
@ -0,0 +1,596 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_common.go -output tables.go
|
||||
|
||||
package language // import "golang.org/x/text/internal/language"
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
|
||||
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
|
||||
maxCoreSize = 12
|
||||
|
||||
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
|
||||
// is large enough to hold at least 99% of the BCP 47 tags.
|
||||
max99thPercentileSize = 32
|
||||
|
||||
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
|
||||
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
|
||||
maxSimpleUExtensionSize = 14
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed. The zero value of Tag is Und.
|
||||
type Tag struct {
|
||||
// TODO: the following fields have the form TagTypeID. This name is chosen
|
||||
// to allow refactoring the public package without conflicting with its
|
||||
// Base, Script, and Region methods. Once the transition is fully completed
|
||||
// the ID can be stripped from the name.
|
||||
|
||||
LangID Language
|
||||
RegionID Region
|
||||
// TODO: we will soon run out of positions for ScriptID. Idea: instead of
|
||||
// storing lang, region, and ScriptID codes, store only the compact index and
|
||||
// have a lookup table from this code to its expansion. This greatly speeds
|
||||
// up table lookup, speed up common variant cases.
|
||||
// This will also immediately free up 3 extra bytes. Also, the pVariant
|
||||
// field can now be moved to the lookup table, as the compact index uniquely
|
||||
// determines the offset of a possible variant.
|
||||
ScriptID Script
|
||||
pVariant byte // offset in str, includes preceding '-'
|
||||
pExt uint16 // offset of first extension, includes preceding '-'
|
||||
|
||||
// str is the string representation of the Tag. It will only be used if the
|
||||
// tag has variants or extensions.
|
||||
str string
|
||||
}
|
||||
|
||||
// Make is a convenience wrapper for Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func Make(s string) Tag {
|
||||
t, _ := Parse(s)
|
||||
return t
|
||||
}
|
||||
|
||||
// Raw returns the raw base language, script and region, without making an
|
||||
// attempt to infer their values.
|
||||
// TODO: consider removing
|
||||
func (t Tag) Raw() (b Language, s Script, r Region) {
|
||||
return t.LangID, t.ScriptID, t.RegionID
|
||||
}
|
||||
|
||||
// equalTags compares language, script and region subtags only.
|
||||
func (t Tag) equalTags(a Tag) bool {
|
||||
return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
if int(t.pVariant) < len(t.str) {
|
||||
return false
|
||||
}
|
||||
return t.equalTags(Und)
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
|
||||
// tag.
|
||||
func (t Tag) IsPrivateUse() bool {
|
||||
return t.str != "" && t.pVariant == 0
|
||||
}
|
||||
|
||||
// RemakeString is used to update t.str in case lang, script or region changed.
|
||||
// It is assumed that pExt and pVariant still point to the start of the
|
||||
// respective parts.
|
||||
func (t *Tag) RemakeString() {
|
||||
if t.str == "" {
|
||||
return
|
||||
}
|
||||
extra := t.str[t.pVariant:]
|
||||
if t.pVariant > 0 {
|
||||
extra = extra[1:]
|
||||
}
|
||||
if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
|
||||
t.str = extra
|
||||
t.pVariant = 0
|
||||
t.pExt = 0
|
||||
return
|
||||
}
|
||||
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
|
||||
b := buf[:t.genCoreBytes(buf[:])]
|
||||
if extra != "" {
|
||||
diff := len(b) - int(t.pVariant)
|
||||
b = append(b, '-')
|
||||
b = append(b, extra...)
|
||||
t.pVariant = uint8(int(t.pVariant) + diff)
|
||||
t.pExt = uint16(int(t.pExt) + diff)
|
||||
} else {
|
||||
t.pVariant = uint8(len(b))
|
||||
t.pExt = uint16(len(b))
|
||||
}
|
||||
t.str = string(b)
|
||||
}
|
||||
|
||||
// genCoreBytes writes a string for the base languages, script and region tags
|
||||
// to the given buffer and returns the number of bytes written. It will never
|
||||
// write more than maxCoreSize bytes.
|
||||
func (t *Tag) genCoreBytes(buf []byte) int {
|
||||
n := t.LangID.StringToBuf(buf[:])
|
||||
if t.ScriptID != 0 {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], t.ScriptID.String())
|
||||
}
|
||||
if t.RegionID != 0 {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], t.RegionID.String())
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// String returns the canonical string representation of the language tag.
|
||||
func (t Tag) String() string {
|
||||
if t.str != "" {
|
||||
return t.str
|
||||
}
|
||||
if t.ScriptID == 0 && t.RegionID == 0 {
|
||||
return t.LangID.String()
|
||||
}
|
||||
buf := [maxCoreSize]byte{}
|
||||
return string(buf[:t.genCoreBytes(buf[:])])
|
||||
}
|
||||
|
||||
// MarshalText implements encoding.TextMarshaler.
|
||||
func (t Tag) MarshalText() (text []byte, err error) {
|
||||
if t.str != "" {
|
||||
text = append(text, t.str...)
|
||||
} else if t.ScriptID == 0 && t.RegionID == 0 {
|
||||
text = append(text, t.LangID.String()...)
|
||||
} else {
|
||||
buf := [maxCoreSize]byte{}
|
||||
text = buf[:t.genCoreBytes(buf[:])]
|
||||
}
|
||||
return text, nil
|
||||
}
|
||||
|
||||
// UnmarshalText implements encoding.TextUnmarshaler.
|
||||
func (t *Tag) UnmarshalText(text []byte) error {
|
||||
tag, err := Parse(string(text))
|
||||
*t = tag
|
||||
return err
|
||||
}
|
||||
|
||||
// Variants returns the part of the tag holding all variants or the empty string
|
||||
// if there are no variants defined.
|
||||
func (t Tag) Variants() string {
|
||||
if t.pVariant == 0 {
|
||||
return ""
|
||||
}
|
||||
return t.str[t.pVariant:t.pExt]
|
||||
}
|
||||
|
||||
// VariantOrPrivateUseTags returns variants or private use tags.
|
||||
func (t Tag) VariantOrPrivateUseTags() string {
|
||||
if t.pExt > 0 {
|
||||
return t.str[t.pVariant:t.pExt]
|
||||
}
|
||||
return t.str[t.pVariant:]
|
||||
}
|
||||
|
||||
// HasString reports whether this tag defines more than just the raw
|
||||
// components.
|
||||
func (t Tag) HasString() bool {
|
||||
return t.str != ""
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
func (t Tag) Parent() Tag {
|
||||
if t.str != "" {
|
||||
// Strip the variants and extensions.
|
||||
b, s, r := t.Raw()
|
||||
t = Tag{LangID: b, ScriptID: s, RegionID: r}
|
||||
if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID == t.ScriptID {
|
||||
return Tag{LangID: t.LangID}
|
||||
}
|
||||
}
|
||||
return t
|
||||
}
|
||||
if t.LangID != 0 {
|
||||
if t.RegionID != 0 {
|
||||
maxScript := t.ScriptID
|
||||
if maxScript == 0 {
|
||||
max, _ := addTags(t)
|
||||
maxScript = max.ScriptID
|
||||
}
|
||||
|
||||
for i := range parents {
|
||||
if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
|
||||
for _, r := range parents[i].fromRegion {
|
||||
if Region(r) == t.RegionID {
|
||||
return Tag{
|
||||
LangID: t.LangID,
|
||||
ScriptID: Script(parents[i].script),
|
||||
RegionID: Region(parents[i].toRegion),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Strip the script if it is the default one.
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID != maxScript {
|
||||
return Tag{LangID: t.LangID, ScriptID: maxScript}
|
||||
}
|
||||
return Tag{LangID: t.LangID}
|
||||
} else if t.ScriptID != 0 {
|
||||
// The parent for an base-script pair with a non-default script is
|
||||
// "und" instead of the base language.
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID != t.ScriptID {
|
||||
return Und
|
||||
}
|
||||
return Tag{LangID: t.LangID}
|
||||
}
|
||||
}
|
||||
return Und
|
||||
}
|
||||
|
||||
// ParseExtension parses s as an extension and returns it on success.
|
||||
func ParseExtension(s string) (ext string, err error) {
|
||||
scan := makeScannerString(s)
|
||||
var end int
|
||||
if n := len(scan.token); n != 1 {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
scan.toLower(0, len(scan.b))
|
||||
end = parseExtension(&scan)
|
||||
if end != len(s) {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
return string(scan.b), nil
|
||||
}
|
||||
|
||||
// HasVariants reports whether t has variants.
|
||||
func (t Tag) HasVariants() bool {
|
||||
return uint16(t.pVariant) < t.pExt
|
||||
}
|
||||
|
||||
// HasExtensions reports whether t has extensions.
|
||||
func (t Tag) HasExtensions() bool {
|
||||
return int(t.pExt) < len(t.str)
|
||||
}
|
||||
|
||||
// Extension returns the extension of type x for tag t. It will return
|
||||
// false for ok if t does not have the requested extension. The returned
|
||||
// extension will be invalid in this case.
|
||||
func (t Tag) Extension(x byte) (ext string, ok bool) {
|
||||
for i := int(t.pExt); i < len(t.str)-1; {
|
||||
var ext string
|
||||
i, ext = getExtension(t.str, i)
|
||||
if ext[0] == x {
|
||||
return ext, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Extensions returns all extensions of t.
|
||||
func (t Tag) Extensions() []string {
|
||||
e := []string{}
|
||||
for i := int(t.pExt); i < len(t.str)-1; {
|
||||
var ext string
|
||||
i, ext = getExtension(t.str, i)
|
||||
e = append(e, ext)
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// TypeForKey returns the type associated with the given key, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||
func (t Tag) TypeForKey(key string) string {
|
||||
if start, end, _ := t.findTypeForKey(key); end != start {
|
||||
return t.str[start:end]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var (
|
||||
errPrivateUse = errors.New("cannot set a key on a private use tag")
|
||||
errInvalidArguments = errors.New("invalid key or type")
|
||||
)
|
||||
|
||||
// SetTypeForKey returns a new Tag with the key set to type, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// An empty value removes an existing pair with the same key.
|
||||
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
|
||||
if t.IsPrivateUse() {
|
||||
return t, errPrivateUse
|
||||
}
|
||||
if len(key) != 2 {
|
||||
return t, errInvalidArguments
|
||||
}
|
||||
|
||||
// Remove the setting if value is "".
|
||||
if value == "" {
|
||||
start, end, _ := t.findTypeForKey(key)
|
||||
if start != end {
|
||||
// Remove key tag and leading '-'.
|
||||
start -= 4
|
||||
|
||||
// Remove a possible empty extension.
|
||||
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
|
||||
start -= 2
|
||||
}
|
||||
if start == int(t.pVariant) && end == len(t.str) {
|
||||
t.str = ""
|
||||
t.pVariant, t.pExt = 0, 0
|
||||
} else {
|
||||
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
if len(value) < 3 || len(value) > 8 {
|
||||
return t, errInvalidArguments
|
||||
}
|
||||
|
||||
var (
|
||||
buf [maxCoreSize + maxSimpleUExtensionSize]byte
|
||||
uStart int // start of the -u extension.
|
||||
)
|
||||
|
||||
// Generate the tag string if needed.
|
||||
if t.str == "" {
|
||||
uStart = t.genCoreBytes(buf[:])
|
||||
buf[uStart] = '-'
|
||||
uStart++
|
||||
}
|
||||
|
||||
// Create new key-type pair and parse it to verify.
|
||||
b := buf[uStart:]
|
||||
copy(b, "u-")
|
||||
copy(b[2:], key)
|
||||
b[4] = '-'
|
||||
b = b[:5+copy(b[5:], value)]
|
||||
scan := makeScanner(b)
|
||||
if parseExtensions(&scan); scan.err != nil {
|
||||
return t, scan.err
|
||||
}
|
||||
|
||||
// Assemble the replacement string.
|
||||
if t.str == "" {
|
||||
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
|
||||
t.str = string(buf[:uStart+len(b)])
|
||||
} else {
|
||||
s := t.str
|
||||
start, end, hasExt := t.findTypeForKey(key)
|
||||
if start == end {
|
||||
if hasExt {
|
||||
b = b[2:]
|
||||
}
|
||||
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
|
||||
} else {
|
||||
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// findKeyAndType returns the start and end position for the type corresponding
|
||||
// to key or the point at which to insert the key-value pair if the type
|
||||
// wasn't found. The hasExt return value reports whether an -u extension was present.
|
||||
// Note: the extensions are typically very small and are likely to contain
|
||||
// only one key-type pair.
|
||||
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
|
||||
p := int(t.pExt)
|
||||
if len(key) != 2 || p == len(t.str) || p == 0 {
|
||||
return p, p, false
|
||||
}
|
||||
s := t.str
|
||||
|
||||
// Find the correct extension.
|
||||
for p++; s[p] != 'u'; p++ {
|
||||
if s[p] > 'u' {
|
||||
p--
|
||||
return p, p, false
|
||||
}
|
||||
if p = nextExtension(s, p); p == len(s) {
|
||||
return len(s), len(s), false
|
||||
}
|
||||
}
|
||||
// Proceed to the hyphen following the extension name.
|
||||
p++
|
||||
|
||||
// curKey is the key currently being processed.
|
||||
curKey := ""
|
||||
|
||||
// Iterate over keys until we get the end of a section.
|
||||
for {
|
||||
// p points to the hyphen preceding the current token.
|
||||
if p3 := p + 3; s[p3] == '-' {
|
||||
// Found a key.
|
||||
// Check whether we just processed the key that was requested.
|
||||
if curKey == key {
|
||||
return start, p, true
|
||||
}
|
||||
// Set to the next key and continue scanning type tokens.
|
||||
curKey = s[p+1 : p3]
|
||||
if curKey > key {
|
||||
return p, p, true
|
||||
}
|
||||
// Start of the type token sequence.
|
||||
start = p + 4
|
||||
// A type is at least 3 characters long.
|
||||
p += 7 // 4 + 3
|
||||
} else {
|
||||
// Attribute or type, which is at least 3 characters long.
|
||||
p += 4
|
||||
}
|
||||
// p points past the third character of a type or attribute.
|
||||
max := p + 5 // maximum length of token plus hyphen.
|
||||
if len(s) < max {
|
||||
max = len(s)
|
||||
}
|
||||
for ; p < max && s[p] != '-'; p++ {
|
||||
}
|
||||
// Bail if we have exhausted all tokens or if the next token starts
|
||||
// a new extension.
|
||||
if p == len(s) || s[p+2] == '-' {
|
||||
if curKey == key {
|
||||
return start, p, true
|
||||
}
|
||||
return p, p, true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown language identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseBase(s string) (Language, error) {
|
||||
if n := len(s); n < 2 || 3 < n {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [3]byte
|
||||
return getLangID(buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// ParseScript parses a 4-letter ISO 15924 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown script identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseScript(s string) (Script, error) {
|
||||
if len(s) != 4 {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [4]byte
|
||||
return getScriptID(script, buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// EncodeM49 returns the Region for the given UN M.49 code.
|
||||
// It returns an error if r is not a valid code.
|
||||
func EncodeM49(r int) (Region, error) {
|
||||
return getRegionM49(r)
|
||||
}
|
||||
|
||||
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown region identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseRegion(s string) (Region, error) {
|
||||
if n := len(s); n < 2 || 3 < n {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [3]byte
|
||||
return getRegionID(buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// IsCountry returns whether this region is a country or autonomous area. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsCountry() bool {
|
||||
if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// IsGroup returns whether this region defines a collection of regions. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsGroup() bool {
|
||||
if r == 0 {
|
||||
return false
|
||||
}
|
||||
return int(regionInclusion[r]) < len(regionContainment)
|
||||
}
|
||||
|
||||
// Contains returns whether Region c is contained by Region r. It returns true
|
||||
// if c == r.
|
||||
func (r Region) Contains(c Region) bool {
|
||||
if r == c {
|
||||
return true
|
||||
}
|
||||
g := regionInclusion[r]
|
||||
if g >= nRegionGroups {
|
||||
return false
|
||||
}
|
||||
m := regionContainment[g]
|
||||
|
||||
d := regionInclusion[c]
|
||||
b := regionInclusionBits[d]
|
||||
|
||||
// A contained country may belong to multiple disjoint groups. Matching any
|
||||
// of these indicates containment. If the contained region is a group, it
|
||||
// must strictly be a subset.
|
||||
if d >= nRegionGroups {
|
||||
return b&m != 0
|
||||
}
|
||||
return b&^m == 0
|
||||
}
|
||||
|
||||
var errNoTLD = errors.New("language: region is not a valid ccTLD")
|
||||
|
||||
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
|
||||
// In all other cases it returns either the region itself or an error.
|
||||
//
|
||||
// This method may return an error for a region for which there exists a
|
||||
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
|
||||
// region will already be canonicalized it was obtained from a Tag that was
|
||||
// obtained using any of the default methods.
|
||||
func (r Region) TLD() (Region, error) {
|
||||
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
|
||||
// difference between ISO 3166-1 and IANA ccTLD.
|
||||
if r == _GB {
|
||||
r = _UK
|
||||
}
|
||||
if (r.typ() & ccTLD) == 0 {
|
||||
return 0, errNoTLD
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Canonicalize returns the region or a possible replacement if the region is
|
||||
// deprecated. It will not return a replacement for deprecated regions that
|
||||
// are split into multiple regions.
|
||||
func (r Region) Canonicalize() Region {
|
||||
if cr := normRegion(r); cr != 0 {
|
||||
return cr
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||
type Variant struct {
|
||||
ID uint8
|
||||
str string
|
||||
}
|
||||
|
||||
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
||||
// a valid variant.
|
||||
func ParseVariant(s string) (Variant, error) {
|
||||
s = strings.ToLower(s)
|
||||
if id, ok := variantIndex[s]; ok {
|
||||
return Variant{id, s}, nil
|
||||
}
|
||||
return Variant{}, NewValueError([]byte(s))
|
||||
}
|
||||
|
||||
// String returns the string representation of the variant.
|
||||
func (v Variant) String() string {
|
||||
return v.str
|
||||
}
|
412
vendor/golang.org/x/text/internal/language/lookup.go
generated
vendored
Normal file
412
vendor/golang.org/x/text/internal/language/lookup.go
generated
vendored
Normal file
@ -0,0 +1,412 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
// findIndex tries to find the given tag in idx and returns a standardized error
|
||||
// if it could not be found.
|
||||
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
|
||||
if !tag.FixCase(form, key) {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
i := idx.Index(key)
|
||||
if i == -1 {
|
||||
return 0, NewValueError(key)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func searchUint(imap []uint16, key uint16) int {
|
||||
return sort.Search(len(imap), func(i int) bool {
|
||||
return imap[i] >= key
|
||||
})
|
||||
}
|
||||
|
||||
type Language uint16
|
||||
|
||||
// getLangID returns the langID of s if s is a canonical subtag
|
||||
// or langUnknown if s is not a canonical subtag.
|
||||
func getLangID(s []byte) (Language, error) {
|
||||
if len(s) == 2 {
|
||||
return getLangISO2(s)
|
||||
}
|
||||
return getLangISO3(s)
|
||||
}
|
||||
|
||||
// TODO language normalization as well as the AliasMaps could be moved to the
|
||||
// higher level package, but it is a bit tricky to separate the generation.
|
||||
|
||||
func (id Language) Canonicalize() (Language, AliasType) {
|
||||
return normLang(id)
|
||||
}
|
||||
|
||||
// mapLang returns the mapped langID of id according to mapping m.
|
||||
func normLang(id Language) (Language, AliasType) {
|
||||
k := sort.Search(len(AliasMap), func(i int) bool {
|
||||
return AliasMap[i].From >= uint16(id)
|
||||
})
|
||||
if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
|
||||
return Language(AliasMap[k].To), AliasTypes[k]
|
||||
}
|
||||
return id, AliasTypeUnknown
|
||||
}
|
||||
|
||||
// getLangISO2 returns the langID for the given 2-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO2(s []byte) (Language, error) {
|
||||
if !tag.FixCase("zz", s) {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
|
||||
return Language(i), nil
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
|
||||
const base = 'z' - 'a' + 1
|
||||
|
||||
func strToInt(s []byte) uint {
|
||||
v := uint(0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
v *= base
|
||||
v += uint(s[i] - 'a')
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// converts the given integer to the original ASCII string passed to strToInt.
|
||||
// len(s) must match the number of characters obtained.
|
||||
func intToStr(v uint, s []byte) {
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
s[i] = byte(v%base) + 'a'
|
||||
v /= base
|
||||
}
|
||||
}
|
||||
|
||||
// getLangISO3 returns the langID for the given 3-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO3(s []byte) (Language, error) {
|
||||
if tag.FixCase("und", s) {
|
||||
// first try to match canonical 3-letter entries
|
||||
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
|
||||
if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
|
||||
// We treat "und" as special and always translate it to "unspecified".
|
||||
// Note that ZZ and Zzzz are private use and are not treated as
|
||||
// unspecified by default.
|
||||
id := Language(i)
|
||||
if id == nonCanonicalUnd {
|
||||
return 0, nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
if i := altLangISO3.Index(s); i != -1 {
|
||||
return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
|
||||
}
|
||||
n := strToInt(s)
|
||||
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
|
||||
return Language(n) + langNoIndexOffset, nil
|
||||
}
|
||||
// Check for non-canonical uses of ISO3.
|
||||
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
|
||||
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||
return Language(i), nil
|
||||
}
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
|
||||
// StringToBuf writes the string to b and returns the number of bytes
|
||||
// written. cap(b) must be >= 3.
|
||||
func (id Language) StringToBuf(b []byte) int {
|
||||
if id >= langNoIndexOffset {
|
||||
intToStr(uint(id)-langNoIndexOffset, b[:3])
|
||||
return 3
|
||||
} else if id == 0 {
|
||||
return copy(b, "und")
|
||||
}
|
||||
l := lang[id<<2:]
|
||||
if l[3] == 0 {
|
||||
return copy(b, l[:3])
|
||||
}
|
||||
return copy(b, l[:2])
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation of the langID.
|
||||
// Use b as variable name, instead of id, to ensure the variable
|
||||
// used is consistent with that of Base in which this type is embedded.
|
||||
func (b Language) String() string {
|
||||
if b == 0 {
|
||||
return "und"
|
||||
} else if b >= langNoIndexOffset {
|
||||
b -= langNoIndexOffset
|
||||
buf := [3]byte{}
|
||||
intToStr(uint(b), buf[:])
|
||||
return string(buf[:])
|
||||
}
|
||||
l := lang.Elem(int(b))
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
}
|
||||
return l[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the ISO 639-3 language code.
|
||||
func (b Language) ISO3() string {
|
||||
if b == 0 || b >= langNoIndexOffset {
|
||||
return b.String()
|
||||
}
|
||||
l := lang.Elem(int(b))
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
} else if l[2] == 0 {
|
||||
return altLangISO3.Elem(int(l[3]))[:3]
|
||||
}
|
||||
// This allocation will only happen for 3-letter ISO codes
|
||||
// that are non-canonical BCP 47 language identifiers.
|
||||
return l[0:1] + l[2:4]
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||
func (b Language) IsPrivateUse() bool {
|
||||
return langPrivateStart <= b && b <= langPrivateEnd
|
||||
}
|
||||
|
||||
// SuppressScript returns the script marked as SuppressScript in the IANA
|
||||
// language tag repository, or 0 if there is no such script.
|
||||
func (b Language) SuppressScript() Script {
|
||||
if b < langNoIndexOffset {
|
||||
return Script(suppressScript[b])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type Region uint16
|
||||
|
||||
// getRegionID returns the region id for s if s is a valid 2-letter region code
|
||||
// or unknownRegion.
|
||||
func getRegionID(s []byte) (Region, error) {
|
||||
if len(s) == 3 {
|
||||
if isAlpha(s[0]) {
|
||||
return getRegionISO3(s)
|
||||
}
|
||||
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
|
||||
return getRegionM49(int(i))
|
||||
}
|
||||
}
|
||||
return getRegionISO2(s)
|
||||
}
|
||||
|
||||
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO2(s []byte) (Region, error) {
|
||||
i, err := findIndex(regionISO, s, "ZZ")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return Region(i) + isoRegionOffset, nil
|
||||
}
|
||||
|
||||
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO3(s []byte) (Region, error) {
|
||||
if tag.FixCase("ZZZ", s) {
|
||||
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
|
||||
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||
return Region(i) + isoRegionOffset, nil
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(altRegionISO3); i += 3 {
|
||||
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
|
||||
return Region(altRegionIDs[i/3]), nil
|
||||
}
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
|
||||
func getRegionM49(n int) (Region, error) {
|
||||
if 0 < n && n <= 999 {
|
||||
const (
|
||||
searchBits = 7
|
||||
regionBits = 9
|
||||
regionMask = 1<<regionBits - 1
|
||||
)
|
||||
idx := n >> searchBits
|
||||
buf := fromM49[m49Index[idx]:m49Index[idx+1]]
|
||||
val := uint16(n) << regionBits // we rely on bits shifting out
|
||||
i := sort.Search(len(buf), func(i int) bool {
|
||||
return buf[i] >= val
|
||||
})
|
||||
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
|
||||
return Region(r & regionMask), nil
|
||||
}
|
||||
}
|
||||
var e ValueError
|
||||
fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
|
||||
return 0, e
|
||||
}
|
||||
|
||||
// normRegion returns a region if r is deprecated or 0 otherwise.
|
||||
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
|
||||
// TODO: consider mapping split up regions to new most populous one (like CLDR).
|
||||
func normRegion(r Region) Region {
|
||||
m := regionOldMap
|
||||
k := sort.Search(len(m), func(i int) bool {
|
||||
return m[i].From >= uint16(r)
|
||||
})
|
||||
if k < len(m) && m[k].From == uint16(r) {
|
||||
return Region(m[k].To)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
const (
|
||||
iso3166UserAssigned = 1 << iota
|
||||
ccTLD
|
||||
bcp47Region
|
||||
)
|
||||
|
||||
func (r Region) typ() byte {
|
||||
return regionTypes[r]
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation for the region.
|
||||
// It returns "ZZ" for an unspecified region.
|
||||
func (r Region) String() string {
|
||||
if r < isoRegionOffset {
|
||||
if r == 0 {
|
||||
return "ZZ"
|
||||
}
|
||||
return fmt.Sprintf("%03d", r.M49())
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
return regionISO.Elem(int(r))[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the 3-letter ISO code of r.
|
||||
// Note that not all regions have a 3-letter ISO code.
|
||||
// In such cases this method returns "ZZZ".
|
||||
func (r Region) ISO3() string {
|
||||
if r < isoRegionOffset {
|
||||
return "ZZZ"
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
reg := regionISO.Elem(int(r))
|
||||
switch reg[2] {
|
||||
case 0:
|
||||
return altRegionISO3[reg[3]:][:3]
|
||||
case ' ':
|
||||
return "ZZZ"
|
||||
}
|
||||
return reg[0:1] + reg[2:4]
|
||||
}
|
||||
|
||||
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
|
||||
// is not defined for r.
|
||||
func (r Region) M49() int {
|
||||
return int(m49[r])
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
|
||||
// may include private-use tags that are assigned by CLDR and used in this
|
||||
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
|
||||
func (r Region) IsPrivateUse() bool {
|
||||
return r.typ()&iso3166UserAssigned != 0
|
||||
}
|
||||
|
||||
type Script uint8
|
||||
|
||||
// getScriptID returns the script id for string s. It assumes that s
|
||||
// is of the format [A-Z][a-z]{3}.
|
||||
func getScriptID(idx tag.Index, s []byte) (Script, error) {
|
||||
i, err := findIndex(idx, s, "Zzzz")
|
||||
return Script(i), err
|
||||
}
|
||||
|
||||
// String returns the script code in title case.
|
||||
// It returns "Zzzz" for an unspecified script.
|
||||
func (s Script) String() string {
|
||||
if s == 0 {
|
||||
return "Zzzz"
|
||||
}
|
||||
return script.Elem(int(s))
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this script code is reserved for private use.
|
||||
func (s Script) IsPrivateUse() bool {
|
||||
return _Qaaa <= s && s <= _Qabx
|
||||
}
|
||||
|
||||
const (
|
||||
maxAltTaglen = len("en-US-POSIX")
|
||||
maxLen = maxAltTaglen
|
||||
)
|
||||
|
||||
var (
|
||||
// grandfatheredMap holds a mapping from legacy and grandfathered tags to
|
||||
// their base language or index to more elaborate tag.
|
||||
grandfatheredMap = map[[maxLen]byte]int16{
|
||||
[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban
|
||||
[maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami
|
||||
[maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn
|
||||
[maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak
|
||||
[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon
|
||||
[maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux
|
||||
[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo
|
||||
[maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn
|
||||
[maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao
|
||||
[maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay
|
||||
[maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu
|
||||
[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok
|
||||
[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE
|
||||
[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu
|
||||
[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka
|
||||
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan
|
||||
[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang
|
||||
|
||||
// Grandfathered tags with no modern replacement will be converted as
|
||||
// follows:
|
||||
[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish
|
||||
[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed
|
||||
[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default
|
||||
[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian
|
||||
[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo
|
||||
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min
|
||||
|
||||
// CLDR-specific tag.
|
||||
[maxLen]byte{'r', 'o', 'o', 't'}: 0, // root
|
||||
[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"
|
||||
}
|
||||
|
||||
altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
|
||||
|
||||
altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
|
||||
)
|
||||
|
||||
func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
|
||||
if v, ok := grandfatheredMap[s]; ok {
|
||||
if v < 0 {
|
||||
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
|
||||
}
|
||||
t.LangID = Language(v)
|
||||
return t, true
|
||||
}
|
||||
return t, false
|
||||
}
|
226
vendor/golang.org/x/text/internal/language/match.go
generated
vendored
Normal file
226
vendor/golang.org/x/text/internal/language/match.go
generated
vendored
Normal file
@ -0,0 +1,226 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import "errors"
|
||||
|
||||
type scriptRegionFlags uint8
|
||||
|
||||
const (
|
||||
isList = 1 << iota
|
||||
scriptInFrom
|
||||
regionInFrom
|
||||
)
|
||||
|
||||
func (t *Tag) setUndefinedLang(id Language) {
|
||||
if t.LangID == 0 {
|
||||
t.LangID = id
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tag) setUndefinedScript(id Script) {
|
||||
if t.ScriptID == 0 {
|
||||
t.ScriptID = id
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tag) setUndefinedRegion(id Region) {
|
||||
if t.RegionID == 0 || t.RegionID.Contains(id) {
|
||||
t.RegionID = id
|
||||
}
|
||||
}
|
||||
|
||||
// ErrMissingLikelyTagsData indicates no information was available
|
||||
// to compute likely values of missing tags.
|
||||
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
|
||||
|
||||
// addLikelySubtags sets subtags to their most likely value, given the locale.
|
||||
// In most cases this means setting fields for unknown values, but in some
|
||||
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
|
||||
// if the given locale cannot be expanded.
|
||||
func (t Tag) addLikelySubtags() (Tag, error) {
|
||||
id, err := addTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
} else if id.equalTags(t) {
|
||||
return t, nil
|
||||
}
|
||||
id.RemakeString()
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// specializeRegion attempts to specialize a group region.
|
||||
func specializeRegion(t *Tag) bool {
|
||||
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
|
||||
x := likelyRegionGroup[i]
|
||||
if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
|
||||
t.RegionID = Region(x.region)
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Maximize returns a new tag with missing tags filled in.
|
||||
func (t Tag) Maximize() (Tag, error) {
|
||||
return addTags(t)
|
||||
}
|
||||
|
||||
func addTags(t Tag) (Tag, error) {
|
||||
// We leave private use identifiers alone.
|
||||
if t.IsPrivateUse() {
|
||||
return t, nil
|
||||
}
|
||||
if t.ScriptID != 0 && t.RegionID != 0 {
|
||||
if t.LangID != 0 {
|
||||
// already fully specified
|
||||
specializeRegion(&t)
|
||||
return t, nil
|
||||
}
|
||||
// Search matches for und-script-region. Note that for these cases
|
||||
// region will never be a group so there is no need to check for this.
|
||||
list := likelyRegion[t.RegionID : t.RegionID+1]
|
||||
if x := list[0]; x.flags&isList != 0 {
|
||||
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
|
||||
}
|
||||
for _, x := range list {
|
||||
// Deviating from the spec. See match_test.go for details.
|
||||
if Script(x.script) == t.ScriptID {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if t.LangID != 0 {
|
||||
// Search matches for lang-script and lang-region, where lang != und.
|
||||
if t.LangID < langNoIndexOffset {
|
||||
x := likelyLang[t.LangID]
|
||||
if x.flags&isList != 0 {
|
||||
list := likelyLangList[x.region : x.region+uint16(x.script)]
|
||||
if t.ScriptID != 0 {
|
||||
for _, x := range list {
|
||||
if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
} else if t.RegionID != 0 {
|
||||
count := 0
|
||||
goodScript := true
|
||||
tt := t
|
||||
for _, x := range list {
|
||||
// We visit all entries for which the script was not
|
||||
// defined, including the ones where the region was not
|
||||
// defined. This allows for proper disambiguation within
|
||||
// regions.
|
||||
if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
|
||||
tt.RegionID = Region(x.region)
|
||||
tt.setUndefinedScript(Script(x.script))
|
||||
goodScript = goodScript && tt.ScriptID == Script(x.script)
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count == 1 {
|
||||
return tt, nil
|
||||
}
|
||||
// Even if we fail to find a unique Region, we might have
|
||||
// an unambiguous script.
|
||||
if goodScript {
|
||||
t.ScriptID = tt.ScriptID
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Search matches for und-script.
|
||||
if t.ScriptID != 0 {
|
||||
x := likelyScript[t.ScriptID]
|
||||
if x.region != 0 {
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
// Search matches for und-region. If und-script-region exists, it would
|
||||
// have been found earlier.
|
||||
if t.RegionID != 0 {
|
||||
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
|
||||
x := likelyRegionGroup[i]
|
||||
if x.region != 0 {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
t.RegionID = Region(x.region)
|
||||
}
|
||||
} else {
|
||||
x := likelyRegion[t.RegionID]
|
||||
if x.flags&isList != 0 {
|
||||
x = likelyRegionList[x.lang]
|
||||
}
|
||||
if x.script != 0 && x.flags != scriptInFrom {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Search matches for lang.
|
||||
if t.LangID < langNoIndexOffset {
|
||||
x := likelyLang[t.LangID]
|
||||
if x.flags&isList != 0 {
|
||||
x = likelyLangList[x.region]
|
||||
}
|
||||
if x.region != 0 {
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
}
|
||||
specializeRegion(&t)
|
||||
if t.LangID == 0 {
|
||||
t.LangID = _en // default language
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
return t, ErrMissingLikelyTagsData
|
||||
}
|
||||
|
||||
func (t *Tag) setTagsFrom(id Tag) {
|
||||
t.LangID = id.LangID
|
||||
t.ScriptID = id.ScriptID
|
||||
t.RegionID = id.RegionID
|
||||
}
|
||||
|
||||
// minimize removes the region or script subtags from t such that
|
||||
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
|
||||
func (t Tag) minimize() (Tag, error) {
|
||||
t, err := minimizeTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
t.RemakeString()
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// minimizeTags mimics the behavior of the ICU 51 C implementation.
|
||||
func minimizeTags(t Tag) (Tag, error) {
|
||||
if t.equalTags(Und) {
|
||||
return t, nil
|
||||
}
|
||||
max, err := addTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
for _, id := range [...]Tag{
|
||||
{LangID: t.LangID},
|
||||
{LangID: t.LangID, RegionID: t.RegionID},
|
||||
{LangID: t.LangID, ScriptID: t.ScriptID},
|
||||
} {
|
||||
if x, err := addTags(id); err == nil && max.equalTags(x) {
|
||||
t.setTagsFrom(id)
|
||||
break
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
594
vendor/golang.org/x/text/internal/language/parse.go
generated
vendored
Normal file
594
vendor/golang.org/x/text/internal/language/parse.go
generated
vendored
Normal file
@ -0,0 +1,594 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
// isAlpha returns true if the byte is not a digit.
|
||||
// b must be an ASCII letter or digit.
|
||||
func isAlpha(b byte) bool {
|
||||
return b > '9'
|
||||
}
|
||||
|
||||
// isAlphaNum returns true if the string contains only ASCII letters or digits.
|
||||
func isAlphaNum(s []byte) bool {
|
||||
for _, c := range s {
|
||||
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ErrSyntax is returned by any of the parsing functions when the
|
||||
// input is not well-formed, according to BCP 47.
|
||||
// TODO: return the position at which the syntax error occurred?
|
||||
var ErrSyntax = errors.New("language: tag is not well-formed")
|
||||
|
||||
// ErrDuplicateKey is returned when a tag contains the same key twice with
|
||||
// different values in the -u section.
|
||||
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
|
||||
|
||||
// ValueError is returned by any of the parsing functions when the
|
||||
// input is well-formed but the respective subtag is not recognized
|
||||
// as a valid value.
|
||||
type ValueError struct {
|
||||
v [8]byte
|
||||
}
|
||||
|
||||
// NewValueError creates a new ValueError.
|
||||
func NewValueError(tag []byte) ValueError {
|
||||
var e ValueError
|
||||
copy(e.v[:], tag)
|
||||
return e
|
||||
}
|
||||
|
||||
func (e ValueError) tag() []byte {
|
||||
n := bytes.IndexByte(e.v[:], 0)
|
||||
if n == -1 {
|
||||
n = 8
|
||||
}
|
||||
return e.v[:n]
|
||||
}
|
||||
|
||||
// Error implements the error interface.
|
||||
func (e ValueError) Error() string {
|
||||
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
|
||||
}
|
||||
|
||||
// Subtag returns the subtag for which the error occurred.
|
||||
func (e ValueError) Subtag() string {
|
||||
return string(e.tag())
|
||||
}
|
||||
|
||||
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
|
||||
type scanner struct {
|
||||
b []byte
|
||||
bytes [max99thPercentileSize]byte
|
||||
token []byte
|
||||
start int // start position of the current token
|
||||
end int // end position of the current token
|
||||
next int // next point for scan
|
||||
err error
|
||||
done bool
|
||||
}
|
||||
|
||||
func makeScannerString(s string) scanner {
|
||||
scan := scanner{}
|
||||
if len(s) <= len(scan.bytes) {
|
||||
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
|
||||
} else {
|
||||
scan.b = []byte(s)
|
||||
}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
// makeScanner returns a scanner using b as the input buffer.
|
||||
// b is not copied and may be modified by the scanner routines.
|
||||
func makeScanner(b []byte) scanner {
|
||||
scan := scanner{b: b}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
func (s *scanner) init() {
|
||||
for i, c := range s.b {
|
||||
if c == '_' {
|
||||
s.b[i] = '-'
|
||||
}
|
||||
}
|
||||
s.scan()
|
||||
}
|
||||
|
||||
// restToLower converts the string between start and end to lower case.
|
||||
func (s *scanner) toLower(start, end int) {
|
||||
for i := start; i < end; i++ {
|
||||
c := s.b[i]
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
s.b[i] += 'a' - 'A'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) setError(e error) {
|
||||
if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
|
||||
s.err = e
|
||||
}
|
||||
}
|
||||
|
||||
// resizeRange shrinks or grows the array at position oldStart such that
|
||||
// a new string of size newSize can fit between oldStart and oldEnd.
|
||||
// Sets the scan point to after the resized range.
|
||||
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
|
||||
s.start = oldStart
|
||||
if end := oldStart + newSize; end != oldEnd {
|
||||
diff := end - oldEnd
|
||||
if end < cap(s.b) {
|
||||
b := make([]byte, len(s.b)+diff)
|
||||
copy(b, s.b[:oldStart])
|
||||
copy(b[end:], s.b[oldEnd:])
|
||||
s.b = b
|
||||
} else {
|
||||
s.b = append(s.b[end:], s.b[oldEnd:]...)
|
||||
}
|
||||
s.next = end + (s.next - s.end)
|
||||
s.end = end
|
||||
}
|
||||
}
|
||||
|
||||
// replace replaces the current token with repl.
|
||||
func (s *scanner) replace(repl string) {
|
||||
s.resizeRange(s.start, s.end, len(repl))
|
||||
copy(s.b[s.start:], repl)
|
||||
}
|
||||
|
||||
// gobble removes the current token from the input.
|
||||
// Caller must call scan after calling gobble.
|
||||
func (s *scanner) gobble(e error) {
|
||||
s.setError(e)
|
||||
if s.start == 0 {
|
||||
s.b = s.b[:+copy(s.b, s.b[s.next:])]
|
||||
s.end = 0
|
||||
} else {
|
||||
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
|
||||
s.end = s.start - 1
|
||||
}
|
||||
s.next = s.start
|
||||
}
|
||||
|
||||
// deleteRange removes the given range from s.b before the current token.
|
||||
func (s *scanner) deleteRange(start, end int) {
|
||||
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
|
||||
diff := end - start
|
||||
s.next -= diff
|
||||
s.start -= diff
|
||||
s.end -= diff
|
||||
}
|
||||
|
||||
// scan parses the next token of a BCP 47 string. Tokens that are larger
|
||||
// than 8 characters or include non-alphanumeric characters result in an error
|
||||
// and are gobbled and removed from the output.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) scan() (end int) {
|
||||
end = s.end
|
||||
s.token = nil
|
||||
for s.start = s.next; s.next < len(s.b); {
|
||||
i := bytes.IndexByte(s.b[s.next:], '-')
|
||||
if i == -1 {
|
||||
s.end = len(s.b)
|
||||
s.next = len(s.b)
|
||||
i = s.end - s.start
|
||||
} else {
|
||||
s.end = s.next + i
|
||||
s.next = s.end + 1
|
||||
}
|
||||
token := s.b[s.start:s.end]
|
||||
if i < 1 || i > 8 || !isAlphaNum(token) {
|
||||
s.gobble(ErrSyntax)
|
||||
continue
|
||||
}
|
||||
s.token = token
|
||||
return end
|
||||
}
|
||||
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
|
||||
s.setError(ErrSyntax)
|
||||
s.b = s.b[:len(s.b)-1]
|
||||
}
|
||||
s.done = true
|
||||
return end
|
||||
}
|
||||
|
||||
// acceptMinSize parses multiple tokens of the given size or greater.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) acceptMinSize(min int) (end int) {
|
||||
end = s.end
|
||||
s.scan()
|
||||
for ; len(s.token) >= min; s.scan() {
|
||||
end = s.end
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
func Parse(s string) (t Tag, err error) {
|
||||
// TODO: consider supporting old-style locale key-value pairs.
|
||||
if s == "" {
|
||||
return Und, ErrSyntax
|
||||
}
|
||||
if len(s) <= maxAltTaglen {
|
||||
b := [maxAltTaglen]byte{}
|
||||
for i, c := range s {
|
||||
// Generating invalid UTF-8 is okay as it won't match.
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
c += 'a' - 'A'
|
||||
} else if c == '_' {
|
||||
c = '-'
|
||||
}
|
||||
b[i] = byte(c)
|
||||
}
|
||||
if t, ok := grandfathered(b); ok {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
scan := makeScannerString(s)
|
||||
return parse(&scan, s)
|
||||
}
|
||||
|
||||
func parse(scan *scanner, s string) (t Tag, err error) {
|
||||
t = Und
|
||||
var end int
|
||||
if n := len(scan.token); n <= 1 {
|
||||
scan.toLower(0, len(scan.b))
|
||||
if n == 0 || scan.token[0] != 'x' {
|
||||
return t, ErrSyntax
|
||||
}
|
||||
end = parseExtensions(scan)
|
||||
} else if n >= 4 {
|
||||
return Und, ErrSyntax
|
||||
} else { // the usual case
|
||||
t, end = parseTag(scan)
|
||||
if n := len(scan.token); n == 1 {
|
||||
t.pExt = uint16(end)
|
||||
end = parseExtensions(scan)
|
||||
} else if end < len(scan.b) {
|
||||
scan.setError(ErrSyntax)
|
||||
scan.b = scan.b[:end]
|
||||
}
|
||||
}
|
||||
if int(t.pVariant) < len(scan.b) {
|
||||
if end < len(s) {
|
||||
s = s[:end]
|
||||
}
|
||||
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
|
||||
t.str = s
|
||||
} else {
|
||||
t.str = string(scan.b)
|
||||
}
|
||||
} else {
|
||||
t.pVariant, t.pExt = 0, 0
|
||||
}
|
||||
return t, scan.err
|
||||
}
|
||||
|
||||
// parseTag parses language, script, region and variants.
|
||||
// It returns a Tag and the end position in the input that was parsed.
|
||||
func parseTag(scan *scanner) (t Tag, end int) {
|
||||
var e error
|
||||
// TODO: set an error if an unknown lang, script or region is encountered.
|
||||
t.LangID, e = getLangID(scan.token)
|
||||
scan.setError(e)
|
||||
scan.replace(t.LangID.String())
|
||||
langStart := scan.start
|
||||
end = scan.scan()
|
||||
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
|
||||
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
|
||||
// to a tag of the form <extlang>.
|
||||
lang, e := getLangID(scan.token)
|
||||
if lang != 0 {
|
||||
t.LangID = lang
|
||||
copy(scan.b[langStart:], lang.String())
|
||||
scan.b[langStart+3] = '-'
|
||||
scan.start = langStart + 4
|
||||
}
|
||||
scan.gobble(e)
|
||||
end = scan.scan()
|
||||
}
|
||||
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
|
||||
t.ScriptID, e = getScriptID(script, scan.token)
|
||||
if t.ScriptID == 0 {
|
||||
scan.gobble(e)
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
if n := len(scan.token); n >= 2 && n <= 3 {
|
||||
t.RegionID, e = getRegionID(scan.token)
|
||||
if t.RegionID == 0 {
|
||||
scan.gobble(e)
|
||||
} else {
|
||||
scan.replace(t.RegionID.String())
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
scan.toLower(scan.start, len(scan.b))
|
||||
t.pVariant = byte(end)
|
||||
end = parseVariants(scan, end, t)
|
||||
t.pExt = uint16(end)
|
||||
return t, end
|
||||
}
|
||||
|
||||
var separator = []byte{'-'}
|
||||
|
||||
// parseVariants scans tokens as long as each token is a valid variant string.
|
||||
// Duplicate variants are removed.
|
||||
func parseVariants(scan *scanner, end int, t Tag) int {
|
||||
start := scan.start
|
||||
varIDBuf := [4]uint8{}
|
||||
variantBuf := [4][]byte{}
|
||||
varID := varIDBuf[:0]
|
||||
variant := variantBuf[:0]
|
||||
last := -1
|
||||
needSort := false
|
||||
for ; len(scan.token) >= 4; scan.scan() {
|
||||
// TODO: measure the impact of needing this conversion and redesign
|
||||
// the data structure if there is an issue.
|
||||
v, ok := variantIndex[string(scan.token)]
|
||||
if !ok {
|
||||
// unknown variant
|
||||
// TODO: allow user-defined variants?
|
||||
scan.gobble(NewValueError(scan.token))
|
||||
continue
|
||||
}
|
||||
varID = append(varID, v)
|
||||
variant = append(variant, scan.token)
|
||||
if !needSort {
|
||||
if last < int(v) {
|
||||
last = int(v)
|
||||
} else {
|
||||
needSort = true
|
||||
// There is no legal combinations of more than 7 variants
|
||||
// (and this is by no means a useful sequence).
|
||||
const maxVariants = 8
|
||||
if len(varID) > maxVariants {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
end = scan.end
|
||||
}
|
||||
if needSort {
|
||||
sort.Sort(variantsSort{varID, variant})
|
||||
k, l := 0, -1
|
||||
for i, v := range varID {
|
||||
w := int(v)
|
||||
if l == w {
|
||||
// Remove duplicates.
|
||||
continue
|
||||
}
|
||||
varID[k] = varID[i]
|
||||
variant[k] = variant[i]
|
||||
k++
|
||||
l = w
|
||||
}
|
||||
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
|
||||
end = start - 1
|
||||
} else {
|
||||
scan.resizeRange(start, end, len(str))
|
||||
copy(scan.b[scan.start:], str)
|
||||
end = scan.end
|
||||
}
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
type variantsSort struct {
|
||||
i []uint8
|
||||
v [][]byte
|
||||
}
|
||||
|
||||
func (s variantsSort) Len() int {
|
||||
return len(s.i)
|
||||
}
|
||||
|
||||
func (s variantsSort) Swap(i, j int) {
|
||||
s.i[i], s.i[j] = s.i[j], s.i[i]
|
||||
s.v[i], s.v[j] = s.v[j], s.v[i]
|
||||
}
|
||||
|
||||
func (s variantsSort) Less(i, j int) bool {
|
||||
return s.i[i] < s.i[j]
|
||||
}
|
||||
|
||||
type bytesSort struct {
|
||||
b [][]byte
|
||||
n int // first n bytes to compare
|
||||
}
|
||||
|
||||
func (b bytesSort) Len() int {
|
||||
return len(b.b)
|
||||
}
|
||||
|
||||
func (b bytesSort) Swap(i, j int) {
|
||||
b.b[i], b.b[j] = b.b[j], b.b[i]
|
||||
}
|
||||
|
||||
func (b bytesSort) Less(i, j int) bool {
|
||||
for k := 0; k < b.n; k++ {
|
||||
if b.b[i][k] == b.b[j][k] {
|
||||
continue
|
||||
}
|
||||
return b.b[i][k] < b.b[j][k]
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// parseExtensions parses and normalizes the extensions in the buffer.
|
||||
// It returns the last position of scan.b that is part of any extension.
|
||||
// It also trims scan.b to remove excess parts accordingly.
|
||||
func parseExtensions(scan *scanner) int {
|
||||
start := scan.start
|
||||
exts := [][]byte{}
|
||||
private := []byte{}
|
||||
end := scan.end
|
||||
for len(scan.token) == 1 {
|
||||
extStart := scan.start
|
||||
ext := scan.token[0]
|
||||
end = parseExtension(scan)
|
||||
extension := scan.b[extStart:end]
|
||||
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
|
||||
scan.setError(ErrSyntax)
|
||||
end = extStart
|
||||
continue
|
||||
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
|
||||
scan.b = scan.b[:end]
|
||||
return end
|
||||
} else if ext == 'x' {
|
||||
private = extension
|
||||
break
|
||||
}
|
||||
exts = append(exts, extension)
|
||||
}
|
||||
sort.Sort(bytesSort{exts, 1})
|
||||
if len(private) > 0 {
|
||||
exts = append(exts, private)
|
||||
}
|
||||
scan.b = scan.b[:start]
|
||||
if len(exts) > 0 {
|
||||
scan.b = append(scan.b, bytes.Join(exts, separator)...)
|
||||
} else if start > 0 {
|
||||
// Strip trailing '-'.
|
||||
scan.b = scan.b[:start-1]
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// parseExtension parses a single extension and returns the position of
|
||||
// the extension end.
|
||||
func parseExtension(scan *scanner) int {
|
||||
start, end := scan.start, scan.end
|
||||
switch scan.token[0] {
|
||||
case 'u':
|
||||
attrStart := end
|
||||
scan.scan()
|
||||
for last := []byte{}; len(scan.token) > 2; scan.scan() {
|
||||
if bytes.Compare(scan.token, last) != -1 {
|
||||
// Attributes are unsorted. Start over from scratch.
|
||||
p := attrStart + 1
|
||||
scan.next = p
|
||||
attrs := [][]byte{}
|
||||
for scan.scan(); len(scan.token) > 2; scan.scan() {
|
||||
attrs = append(attrs, scan.token)
|
||||
end = scan.end
|
||||
}
|
||||
sort.Sort(bytesSort{attrs, 3})
|
||||
copy(scan.b[p:], bytes.Join(attrs, separator))
|
||||
break
|
||||
}
|
||||
last = scan.token
|
||||
end = scan.end
|
||||
}
|
||||
var last, key []byte
|
||||
for attrEnd := end; len(scan.token) == 2; last = key {
|
||||
key = scan.token
|
||||
keyEnd := scan.end
|
||||
end = scan.acceptMinSize(3)
|
||||
// TODO: check key value validity
|
||||
if keyEnd == end || bytes.Compare(key, last) != 1 {
|
||||
// We have an invalid key or the keys are not sorted.
|
||||
// Start scanning keys from scratch and reorder.
|
||||
p := attrEnd + 1
|
||||
scan.next = p
|
||||
keys := [][]byte{}
|
||||
for scan.scan(); len(scan.token) == 2; {
|
||||
keyStart, keyEnd := scan.start, scan.end
|
||||
end = scan.acceptMinSize(3)
|
||||
if keyEnd != end {
|
||||
keys = append(keys, scan.b[keyStart:end])
|
||||
} else {
|
||||
scan.setError(ErrSyntax)
|
||||
end = keyStart
|
||||
}
|
||||
}
|
||||
sort.Stable(bytesSort{keys, 2})
|
||||
if n := len(keys); n > 0 {
|
||||
k := 0
|
||||
for i := 1; i < n; i++ {
|
||||
if !bytes.Equal(keys[k][:2], keys[i][:2]) {
|
||||
k++
|
||||
keys[k] = keys[i]
|
||||
} else if !bytes.Equal(keys[k], keys[i]) {
|
||||
scan.setError(ErrDuplicateKey)
|
||||
}
|
||||
}
|
||||
keys = keys[:k+1]
|
||||
}
|
||||
reordered := bytes.Join(keys, separator)
|
||||
if e := p + len(reordered); e < end {
|
||||
scan.deleteRange(e, end)
|
||||
end = e
|
||||
}
|
||||
copy(scan.b[p:], reordered)
|
||||
break
|
||||
}
|
||||
}
|
||||
case 't':
|
||||
scan.scan()
|
||||
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
|
||||
_, end = parseTag(scan)
|
||||
scan.toLower(start, end)
|
||||
}
|
||||
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
|
||||
end = scan.acceptMinSize(3)
|
||||
}
|
||||
case 'x':
|
||||
end = scan.acceptMinSize(1)
|
||||
default:
|
||||
end = scan.acceptMinSize(2)
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// getExtension returns the name, body and end position of the extension.
|
||||
func getExtension(s string, p int) (end int, ext string) {
|
||||
if s[p] == '-' {
|
||||
p++
|
||||
}
|
||||
if s[p] == 'x' {
|
||||
return len(s), s[p:]
|
||||
}
|
||||
end = nextExtension(s, p)
|
||||
return end, s[p:end]
|
||||
}
|
||||
|
||||
// nextExtension finds the next extension within the string, searching
|
||||
// for the -<char>- pattern from position p.
|
||||
// In the fast majority of cases, language tags will have at most
|
||||
// one extension and extensions tend to be small.
|
||||
func nextExtension(s string, p int) int {
|
||||
for n := len(s) - 3; p < n; {
|
||||
if s[p] == '-' {
|
||||
if s[p+2] == '-' {
|
||||
return p
|
||||
}
|
||||
p += 3
|
||||
} else {
|
||||
p++
|
||||
}
|
||||
}
|
||||
return len(s)
|
||||
}
|
3431
vendor/golang.org/x/text/internal/language/tables.go
generated
vendored
Normal file
3431
vendor/golang.org/x/text/internal/language/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
48
vendor/golang.org/x/text/internal/language/tags.go
generated
vendored
Normal file
48
vendor/golang.org/x/text/internal/language/tags.go
generated
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func MustParse(s string) Tag {
|
||||
t, err := Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
|
||||
// It simplifies safe initialization of Base values.
|
||||
func MustParseBase(s string) Language {
|
||||
b, err := ParseBase(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// MustParseScript is like ParseScript, but panics if the given script cannot be
|
||||
// parsed. It simplifies safe initialization of Script values.
|
||||
func MustParseScript(s string) Script {
|
||||
scr, err := ParseScript(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
|
||||
// parsed. It simplifies safe initialization of Region values.
|
||||
func MustParseRegion(s string) Region {
|
||||
r, err := ParseRegion(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Und is the root language.
|
||||
var Und Tag
|
100
vendor/golang.org/x/text/internal/tag/tag.go
generated
vendored
Normal file
100
vendor/golang.org/x/text/internal/tag/tag.go
generated
vendored
Normal file
@ -0,0 +1,100 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package tag contains functionality handling tags and related data.
|
||||
package tag // import "golang.org/x/text/internal/tag"
|
||||
|
||||
import "sort"
|
||||
|
||||
// An Index converts tags to a compact numeric value.
|
||||
//
|
||||
// All elements are of size 4. Tags may be up to 4 bytes long. Excess bytes can
|
||||
// be used to store additional information about the tag.
|
||||
type Index string
|
||||
|
||||
// Elem returns the element data at the given index.
|
||||
func (s Index) Elem(x int) string {
|
||||
return string(s[x*4 : x*4+4])
|
||||
}
|
||||
|
||||
// Index reports the index of the given key or -1 if it could not be found.
|
||||
// Only the first len(key) bytes from the start of the 4-byte entries will be
|
||||
// considered for the search and the first match in Index will be returned.
|
||||
func (s Index) Index(key []byte) int {
|
||||
n := len(key)
|
||||
// search the index of the first entry with an equal or higher value than
|
||||
// key in s.
|
||||
index := sort.Search(len(s)/4, func(i int) bool {
|
||||
return cmp(s[i*4:i*4+n], key) != -1
|
||||
})
|
||||
i := index * 4
|
||||
if cmp(s[i:i+len(key)], key) != 0 {
|
||||
return -1
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
// Next finds the next occurrence of key after index x, which must have been
|
||||
// obtained from a call to Index using the same key. It returns x+1 or -1.
|
||||
func (s Index) Next(key []byte, x int) int {
|
||||
if x++; x*4 < len(s) && cmp(s[x*4:x*4+len(key)], key) == 0 {
|
||||
return x
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// cmp returns an integer comparing a and b lexicographically.
|
||||
func cmp(a Index, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
for i, c := range b[:n] {
|
||||
switch {
|
||||
case a[i] > c:
|
||||
return 1
|
||||
case a[i] < c:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case len(a) < len(b):
|
||||
return -1
|
||||
case len(a) > len(b):
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Compare returns an integer comparing a and b lexicographically.
|
||||
func Compare(a string, b []byte) int {
|
||||
return cmp(Index(a), b)
|
||||
}
|
||||
|
||||
// FixCase reformats b to the same pattern of cases as form.
|
||||
// If returns false if string b is malformed.
|
||||
func FixCase(form string, b []byte) bool {
|
||||
if len(form) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i, c := range b {
|
||||
if form[i] <= 'Z' {
|
||||
if c >= 'a' {
|
||||
c -= 'z' - 'Z'
|
||||
}
|
||||
if c < 'A' || 'Z' < c {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
if c <= 'Z' {
|
||||
c += 'z' - 'Z'
|
||||
}
|
||||
if c < 'a' || 'z' < c {
|
||||
return false
|
||||
}
|
||||
}
|
||||
b[i] = c
|
||||
}
|
||||
return true
|
||||
}
|
58
vendor/golang.org/x/text/internal/triegen/compact.go
generated
vendored
Normal file
58
vendor/golang.org/x/text/internal/triegen/compact.go
generated
vendored
Normal file
@ -0,0 +1,58 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package triegen
|
||||
|
||||
// This file defines Compacter and its implementations.
|
||||
|
||||
import "io"
|
||||
|
||||
// A Compacter generates an alternative, more space-efficient way to store a
|
||||
// trie value block. A trie value block holds all possible values for the last
|
||||
// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block
|
||||
// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0).
|
||||
type Compacter interface {
|
||||
// Size returns whether the Compacter could encode the given block as well
|
||||
// as its size in case it can. len(v) is always 64.
|
||||
Size(v []uint64) (sz int, ok bool)
|
||||
|
||||
// Store stores the block using the Compacter's compression method.
|
||||
// It returns a handle with which the block can be retrieved.
|
||||
// len(v) is always 64.
|
||||
Store(v []uint64) uint32
|
||||
|
||||
// Print writes the data structures associated to the given store to w.
|
||||
Print(w io.Writer) error
|
||||
|
||||
// Handler returns the name of a function that gets called during trie
|
||||
// lookup for blocks generated by the Compacter. The function should be of
|
||||
// the form func (n uint32, b byte) uint64, where n is the index returned by
|
||||
// the Compacter's Store method and b is the last byte of the UTF-8
|
||||
// encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the
|
||||
// block.
|
||||
Handler() string
|
||||
}
|
||||
|
||||
// simpleCompacter is the default Compacter used by builder. It implements a
|
||||
// normal trie block.
|
||||
type simpleCompacter builder
|
||||
|
||||
func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) {
|
||||
return blockSize * b.ValueSize, true
|
||||
}
|
||||
|
||||
func (b *simpleCompacter) Store(v []uint64) uint32 {
|
||||
h := uint32(len(b.ValueBlocks) - blockOffset)
|
||||
b.ValueBlocks = append(b.ValueBlocks, v)
|
||||
return h
|
||||
}
|
||||
|
||||
func (b *simpleCompacter) Print(io.Writer) error {
|
||||
// Structures are printed in print.go.
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *simpleCompacter) Handler() string {
|
||||
panic("Handler should be special-cased for this Compacter")
|
||||
}
|
251
vendor/golang.org/x/text/internal/triegen/print.go
generated
vendored
Normal file
251
vendor/golang.org/x/text/internal/triegen/print.go
generated
vendored
Normal file
@ -0,0 +1,251 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package triegen
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"text/template"
|
||||
)
|
||||
|
||||
// print writes all the data structures as well as the code necessary to use the
|
||||
// trie to w.
|
||||
func (b *builder) print(w io.Writer) error {
|
||||
b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize
|
||||
b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize
|
||||
b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize
|
||||
b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize
|
||||
b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize
|
||||
|
||||
// If we only have one root trie, all starter blocks are at position 0 and
|
||||
// we can access the arrays directly.
|
||||
if len(b.Trie) == 1 {
|
||||
// At this point we cannot refer to the generated tables directly.
|
||||
b.ASCIIBlock = b.Name + "Values"
|
||||
b.StarterBlock = b.Name + "Index"
|
||||
} else {
|
||||
// Otherwise we need to have explicit starter indexes in the trie
|
||||
// structure.
|
||||
b.ASCIIBlock = "t.ascii"
|
||||
b.StarterBlock = "t.utf8Start"
|
||||
}
|
||||
|
||||
b.SourceType = "[]byte"
|
||||
if err := lookupGen.Execute(w, b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
b.SourceType = "string"
|
||||
if err := lookupGen.Execute(w, b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := trieGen.Execute(w, b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, c := range b.Compactions {
|
||||
if err := c.c.Print(w); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func printValues(n int, values []uint64) string {
|
||||
w := &bytes.Buffer{}
|
||||
boff := n * blockSize
|
||||
fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff)
|
||||
var newline bool
|
||||
for i, v := range values {
|
||||
if i%6 == 0 {
|
||||
newline = true
|
||||
}
|
||||
if v != 0 {
|
||||
if newline {
|
||||
fmt.Fprintf(w, "\n")
|
||||
newline = false
|
||||
}
|
||||
fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v)
|
||||
}
|
||||
}
|
||||
return w.String()
|
||||
}
|
||||
|
||||
func printIndex(b *builder, nr int, n *node) string {
|
||||
w := &bytes.Buffer{}
|
||||
boff := nr * blockSize
|
||||
fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff)
|
||||
var newline bool
|
||||
for i, c := range n.children {
|
||||
if i%8 == 0 {
|
||||
newline = true
|
||||
}
|
||||
if c != nil {
|
||||
v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index)
|
||||
if v != 0 {
|
||||
if newline {
|
||||
fmt.Fprintf(w, "\n")
|
||||
newline = false
|
||||
}
|
||||
fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
return w.String()
|
||||
}
|
||||
|
||||
var (
|
||||
trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{
|
||||
"printValues": printValues,
|
||||
"printIndex": printIndex,
|
||||
"title": strings.Title,
|
||||
"dec": func(x int) int { return x - 1 },
|
||||
"psize": func(n int) string {
|
||||
return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024)
|
||||
},
|
||||
}).Parse(trieTemplate))
|
||||
lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate))
|
||||
)
|
||||
|
||||
// TODO: consider the return type of lookup. It could be uint64, even if the
|
||||
// internal value type is smaller. We will have to verify this with the
|
||||
// performance of unicode/norm, which is very sensitive to such changes.
|
||||
const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}}
|
||||
// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}.
|
||||
type {{.Name}}Trie struct { {{if $multi}}
|
||||
ascii []{{.ValueType}} // index for ASCII bytes
|
||||
utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0
|
||||
{{end}}}
|
||||
|
||||
func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}}
|
||||
h := {{.Name}}TrieHandles[i]
|
||||
return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] }
|
||||
}
|
||||
|
||||
type {{.Name}}TrieHandle struct {
|
||||
ascii, multi {{.IndexType}}
|
||||
}
|
||||
|
||||
// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes
|
||||
var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{
|
||||
{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}}
|
||||
{{end}}}{{else}}
|
||||
return &{{.Name}}Trie{}
|
||||
}
|
||||
{{end}}
|
||||
// lookupValue determines the type of block n and looks up the value for b.
|
||||
func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} {
|
||||
switch { {{range $i, $c := .Compactions}}
|
||||
{{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}}
|
||||
n -= {{$c.Offset}}{{end}}
|
||||
return {{print $b.ValueType}}({{$c.Handler}}){{end}}
|
||||
}
|
||||
}
|
||||
|
||||
// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes
|
||||
// The third block is the zero block.
|
||||
var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} {
|
||||
{{range $i, $v := .ValueBlocks}}{{printValues $i $v}}
|
||||
{{end}}}
|
||||
|
||||
// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes
|
||||
// Block 0 is the zero block.
|
||||
var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} {
|
||||
{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}}
|
||||
{{end}}}
|
||||
`
|
||||
|
||||
// TODO: consider allowing zero-length strings after evaluating performance with
|
||||
// unicode/norm.
|
||||
const lookupTemplate = `
|
||||
// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return {{.ASCIIBlock}}[c0], 1
|
||||
case c0 < 0xC2:
|
||||
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := {{.StarterBlock}}[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c1), 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := {{.StarterBlock}}[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = {{.Name}}Index[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c2), 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := {{.StarterBlock}}[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = {{.Name}}Index[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = {{.Name}}Index[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return 0, 3 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s.
|
||||
// s must start with a full and valid UTF-8 encoded rune.
|
||||
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} {
|
||||
c0 := s[0]
|
||||
if c0 < 0x80 { // is ASCII
|
||||
return {{.ASCIIBlock}}[c0]
|
||||
}
|
||||
i := {{.StarterBlock}}[c0]
|
||||
if c0 < 0xE0 { // 2-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[1])
|
||||
}
|
||||
i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])]
|
||||
if c0 < 0xF0 { // 3-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[2])
|
||||
}
|
||||
i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])]
|
||||
if c0 < 0xF8 { // 4-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[3])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
`
|
494
vendor/golang.org/x/text/internal/triegen/triegen.go
generated
vendored
Normal file
494
vendor/golang.org/x/text/internal/triegen/triegen.go
generated
vendored
Normal file
@ -0,0 +1,494 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package triegen implements a code generator for a trie for associating
|
||||
// unsigned integer values with UTF-8 encoded runes.
|
||||
//
|
||||
// Many of the go.text packages use tries for storing per-rune information. A
|
||||
// trie is especially useful if many of the runes have the same value. If this
|
||||
// is the case, many blocks can be expected to be shared allowing for
|
||||
// information on many runes to be stored in little space.
|
||||
//
|
||||
// As most of the lookups are done directly on []byte slices, the tries use the
|
||||
// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to
|
||||
// runes and contributes a little bit to better performance. It also naturally
|
||||
// provides a fast path for ASCII.
|
||||
//
|
||||
// Space is also an issue. There are many code points defined in Unicode and as
|
||||
// a result tables can get quite large. So every byte counts. The triegen
|
||||
// package automatically chooses the smallest integer values to represent the
|
||||
// tables. Compacters allow further compression of the trie by allowing for
|
||||
// alternative representations of individual trie blocks.
|
||||
//
|
||||
// triegen allows generating multiple tries as a single structure. This is
|
||||
// useful when, for example, one wants to generate tries for several languages
|
||||
// that have a lot of values in common. Some existing libraries for
|
||||
// internationalization store all per-language data as a dynamically loadable
|
||||
// chunk. The go.text packages are designed with the assumption that the user
|
||||
// typically wants to compile in support for all supported languages, in line
|
||||
// with the approach common to Go to create a single standalone binary. The
|
||||
// multi-root trie approach can give significant storage savings in this
|
||||
// scenario.
|
||||
//
|
||||
// triegen generates both tables and code. The code is optimized to use the
|
||||
// automatically chosen data types. The following code is generated for a Trie
|
||||
// or multiple Tries named "foo":
|
||||
// - type fooTrie
|
||||
// The trie type.
|
||||
//
|
||||
// - func newFooTrie(x int) *fooTrie
|
||||
// Trie constructor, where x is the index of the trie passed to Gen.
|
||||
//
|
||||
// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int)
|
||||
// The lookup method, where uintX is automatically chosen.
|
||||
//
|
||||
// - func lookupString, lookupUnsafe and lookupStringUnsafe
|
||||
// Variants of the above.
|
||||
//
|
||||
// - var fooValues and fooIndex and any tables generated by Compacters.
|
||||
// The core trie data.
|
||||
//
|
||||
// - var fooTrieHandles
|
||||
// Indexes of starter blocks in case of multiple trie roots.
|
||||
//
|
||||
// It is recommended that users test the generated trie by checking the returned
|
||||
// value for every rune. Such exhaustive tests are possible as the number of
|
||||
// runes in Unicode is limited.
|
||||
package triegen // import "golang.org/x/text/internal/triegen"
|
||||
|
||||
// TODO: Arguably, the internally optimized data types would not have to be
|
||||
// exposed in the generated API. We could also investigate not generating the
|
||||
// code, but using it through a package. We would have to investigate the impact
|
||||
// on performance of making such change, though. For packages like unicode/norm,
|
||||
// small changes like this could tank performance.
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"hash/crc64"
|
||||
"io"
|
||||
"log"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// builder builds a set of tries for associating values with runes. The set of
|
||||
// tries can share common index and value blocks.
|
||||
type builder struct {
|
||||
Name string
|
||||
|
||||
// ValueType is the type of the trie values looked up.
|
||||
ValueType string
|
||||
|
||||
// ValueSize is the byte size of the ValueType.
|
||||
ValueSize int
|
||||
|
||||
// IndexType is the type of trie index values used for all UTF-8 bytes of
|
||||
// a rune except the last one.
|
||||
IndexType string
|
||||
|
||||
// IndexSize is the byte size of the IndexType.
|
||||
IndexSize int
|
||||
|
||||
// SourceType is used when generating the lookup functions. If the user
|
||||
// requests StringSupport, all lookup functions will be generated for
|
||||
// string input as well.
|
||||
SourceType string
|
||||
|
||||
Trie []*Trie
|
||||
|
||||
IndexBlocks []*node
|
||||
ValueBlocks [][]uint64
|
||||
Compactions []compaction
|
||||
Checksum uint64
|
||||
|
||||
ASCIIBlock string
|
||||
StarterBlock string
|
||||
|
||||
indexBlockIdx map[uint64]int
|
||||
valueBlockIdx map[uint64]nodeIndex
|
||||
asciiBlockIdx map[uint64]int
|
||||
|
||||
// Stats are used to fill out the template.
|
||||
Stats struct {
|
||||
NValueEntries int
|
||||
NValueBytes int
|
||||
NIndexEntries int
|
||||
NIndexBytes int
|
||||
NHandleBytes int
|
||||
}
|
||||
|
||||
err error
|
||||
}
|
||||
|
||||
// A nodeIndex encodes the index of a node, which is defined by the compaction
|
||||
// which stores it and an index within the compaction. For internal nodes, the
|
||||
// compaction is always 0.
|
||||
type nodeIndex struct {
|
||||
compaction int
|
||||
index int
|
||||
}
|
||||
|
||||
// compaction keeps track of stats used for the compaction.
|
||||
type compaction struct {
|
||||
c Compacter
|
||||
blocks []*node
|
||||
maxHandle uint32
|
||||
totalSize int
|
||||
|
||||
// Used by template-based generator and thus exported.
|
||||
Cutoff uint32
|
||||
Offset uint32
|
||||
Handler string
|
||||
}
|
||||
|
||||
func (b *builder) setError(err error) {
|
||||
if b.err == nil {
|
||||
b.err = err
|
||||
}
|
||||
}
|
||||
|
||||
// An Option can be passed to Gen.
|
||||
type Option func(b *builder) error
|
||||
|
||||
// Compact configures the trie generator to use the given Compacter.
|
||||
func Compact(c Compacter) Option {
|
||||
return func(b *builder) error {
|
||||
b.Compactions = append(b.Compactions, compaction{
|
||||
c: c,
|
||||
Handler: c.Handler() + "(n, b)"})
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Gen writes Go code for a shared trie lookup structure to w for the given
|
||||
// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will
|
||||
// return the *nameTrie for tries[x]. A value can be looked up by using one of
|
||||
// the various lookup methods defined on nameTrie. It returns the table size of
|
||||
// the generated trie.
|
||||
func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {
|
||||
// The index contains two dummy blocks, followed by the zero block. The zero
|
||||
// block is at offset 0x80, so that the offset for the zero block for
|
||||
// continuation bytes is 0.
|
||||
b := &builder{
|
||||
Name: name,
|
||||
Trie: tries,
|
||||
IndexBlocks: []*node{{}, {}, {}},
|
||||
Compactions: []compaction{{
|
||||
Handler: name + "Values[n<<6+uint32(b)]",
|
||||
}},
|
||||
// The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero
|
||||
// block.
|
||||
indexBlockIdx: map[uint64]int{0: 0},
|
||||
valueBlockIdx: map[uint64]nodeIndex{0: {}},
|
||||
asciiBlockIdx: map[uint64]int{},
|
||||
}
|
||||
b.Compactions[0].c = (*simpleCompacter)(b)
|
||||
|
||||
for _, f := range opts {
|
||||
if err := f(b); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
b.build()
|
||||
if b.err != nil {
|
||||
return 0, b.err
|
||||
}
|
||||
if err = b.print(w); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return b.Size(), nil
|
||||
}
|
||||
|
||||
// A Trie represents a single root node of a trie. A builder may build several
|
||||
// overlapping tries at once.
|
||||
type Trie struct {
|
||||
root *node
|
||||
|
||||
hiddenTrie
|
||||
}
|
||||
|
||||
// hiddenTrie contains values we want to be visible to the template generator,
|
||||
// but hidden from the API documentation.
|
||||
type hiddenTrie struct {
|
||||
Name string
|
||||
Checksum uint64
|
||||
ASCIIIndex int
|
||||
StarterIndex int
|
||||
}
|
||||
|
||||
// NewTrie returns a new trie root.
|
||||
func NewTrie(name string) *Trie {
|
||||
return &Trie{
|
||||
&node{
|
||||
children: make([]*node, blockSize),
|
||||
values: make([]uint64, utf8.RuneSelf),
|
||||
},
|
||||
hiddenTrie{Name: name},
|
||||
}
|
||||
}
|
||||
|
||||
// Gen is a convenience wrapper around the Gen func passing t as the only trie
|
||||
// and uses the name passed to NewTrie. It returns the size of the generated
|
||||
// tables.
|
||||
func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {
|
||||
return Gen(w, t.Name, []*Trie{t}, opts...)
|
||||
}
|
||||
|
||||
// node is a node of the intermediate trie structure.
|
||||
type node struct {
|
||||
// children holds this node's children. It is always of length 64.
|
||||
// A child node may be nil.
|
||||
children []*node
|
||||
|
||||
// values contains the values of this node. If it is non-nil, this node is
|
||||
// either a root or leaf node:
|
||||
// For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].
|
||||
// For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF].
|
||||
values []uint64
|
||||
|
||||
index nodeIndex
|
||||
}
|
||||
|
||||
// Insert associates value with the given rune. Insert will panic if a non-zero
|
||||
// value is passed for an invalid rune.
|
||||
func (t *Trie) Insert(r rune, value uint64) {
|
||||
if value == 0 {
|
||||
return
|
||||
}
|
||||
s := string(r)
|
||||
if []rune(s)[0] != r && value != 0 {
|
||||
// Note: The UCD tables will always assign what amounts to a zero value
|
||||
// to a surrogate. Allowing a zero value for an illegal rune allows
|
||||
// users to iterate over [0..MaxRune] without having to explicitly
|
||||
// exclude surrogates, which would be tedious.
|
||||
panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))
|
||||
}
|
||||
if len(s) == 1 {
|
||||
// It is a root node value (ASCII).
|
||||
t.root.values[s[0]] = value
|
||||
return
|
||||
}
|
||||
|
||||
n := t.root
|
||||
for ; len(s) > 1; s = s[1:] {
|
||||
if n.children == nil {
|
||||
n.children = make([]*node, blockSize)
|
||||
}
|
||||
p := s[0] % blockSize
|
||||
c := n.children[p]
|
||||
if c == nil {
|
||||
c = &node{}
|
||||
n.children[p] = c
|
||||
}
|
||||
if len(s) > 2 && c.values != nil {
|
||||
log.Fatalf("triegen: insert(%U): found internal node with values", r)
|
||||
}
|
||||
n = c
|
||||
}
|
||||
if n.values == nil {
|
||||
n.values = make([]uint64, blockSize)
|
||||
}
|
||||
if n.children != nil {
|
||||
log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)
|
||||
}
|
||||
n.values[s[0]-0x80] = value
|
||||
}
|
||||
|
||||
// Size returns the number of bytes the generated trie will take to store. It
|
||||
// needs to be exported as it is used in the templates.
|
||||
func (b *builder) Size() int {
|
||||
// Index blocks.
|
||||
sz := len(b.IndexBlocks) * blockSize * b.IndexSize
|
||||
|
||||
// Skip the first compaction, which represents the normal value blocks, as
|
||||
// its totalSize does not account for the ASCII blocks, which are managed
|
||||
// separately.
|
||||
sz += len(b.ValueBlocks) * blockSize * b.ValueSize
|
||||
for _, c := range b.Compactions[1:] {
|
||||
sz += c.totalSize
|
||||
}
|
||||
|
||||
// TODO: this computation does not account for the fixed overhead of a using
|
||||
// a compaction, either code or data. As for data, though, the typical
|
||||
// overhead of data is in the order of bytes (2 bytes for cases). Further,
|
||||
// the savings of using a compaction should anyway be substantial for it to
|
||||
// be worth it.
|
||||
|
||||
// For multi-root tries, we also need to account for the handles.
|
||||
if len(b.Trie) > 1 {
|
||||
sz += 2 * b.IndexSize * len(b.Trie)
|
||||
}
|
||||
return sz
|
||||
}
|
||||
|
||||
func (b *builder) build() {
|
||||
// Compute the sizes of the values.
|
||||
var vmax uint64
|
||||
for _, t := range b.Trie {
|
||||
vmax = maxValue(t.root, vmax)
|
||||
}
|
||||
b.ValueType, b.ValueSize = getIntType(vmax)
|
||||
|
||||
// Compute all block allocations.
|
||||
// TODO: first compute the ASCII blocks for all tries and then the other
|
||||
// nodes. ASCII blocks are more restricted in placement, as they require two
|
||||
// blocks to be placed consecutively. Processing them first may improve
|
||||
// sharing (at least one zero block can be expected to be saved.)
|
||||
for _, t := range b.Trie {
|
||||
b.Checksum += b.buildTrie(t)
|
||||
}
|
||||
|
||||
// Compute the offsets for all the Compacters.
|
||||
offset := uint32(0)
|
||||
for i := range b.Compactions {
|
||||
c := &b.Compactions[i]
|
||||
c.Offset = offset
|
||||
offset += c.maxHandle + 1
|
||||
c.Cutoff = offset
|
||||
}
|
||||
|
||||
// Compute the sizes of indexes.
|
||||
// TODO: different byte positions could have different sizes. So far we have
|
||||
// not found a case where this is beneficial.
|
||||
imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)
|
||||
for _, ib := range b.IndexBlocks {
|
||||
if x := uint64(ib.index.index); x > imax {
|
||||
imax = x
|
||||
}
|
||||
}
|
||||
b.IndexType, b.IndexSize = getIntType(imax)
|
||||
}
|
||||
|
||||
func maxValue(n *node, max uint64) uint64 {
|
||||
if n == nil {
|
||||
return max
|
||||
}
|
||||
for _, c := range n.children {
|
||||
max = maxValue(c, max)
|
||||
}
|
||||
for _, v := range n.values {
|
||||
if max < v {
|
||||
max = v
|
||||
}
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
func getIntType(v uint64) (string, int) {
|
||||
switch {
|
||||
case v < 1<<8:
|
||||
return "uint8", 1
|
||||
case v < 1<<16:
|
||||
return "uint16", 2
|
||||
case v < 1<<32:
|
||||
return "uint32", 4
|
||||
}
|
||||
return "uint64", 8
|
||||
}
|
||||
|
||||
const (
|
||||
blockSize = 64
|
||||
|
||||
// Subtract two blocks to offset 0x80, the first continuation byte.
|
||||
blockOffset = 2
|
||||
|
||||
// Subtract three blocks to offset 0xC0, the first non-ASCII starter.
|
||||
rootBlockOffset = 3
|
||||
)
|
||||
|
||||
var crcTable = crc64.MakeTable(crc64.ISO)
|
||||
|
||||
func (b *builder) buildTrie(t *Trie) uint64 {
|
||||
n := t.root
|
||||
|
||||
// Get the ASCII offset. For the first trie, the ASCII block will be at
|
||||
// position 0.
|
||||
hasher := crc64.New(crcTable)
|
||||
binary.Write(hasher, binary.BigEndian, n.values)
|
||||
hash := hasher.Sum64()
|
||||
|
||||
v, ok := b.asciiBlockIdx[hash]
|
||||
if !ok {
|
||||
v = len(b.ValueBlocks)
|
||||
b.asciiBlockIdx[hash] = v
|
||||
|
||||
b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])
|
||||
if v == 0 {
|
||||
// Add the zero block at position 2 so that it will be assigned a
|
||||
// zero reference in the lookup blocks.
|
||||
// TODO: always do this? This would allow us to remove a check from
|
||||
// the trie lookup, but at the expense of extra space. Analyze
|
||||
// performance for unicode/norm.
|
||||
b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))
|
||||
}
|
||||
}
|
||||
t.ASCIIIndex = v
|
||||
|
||||
// Compute remaining offsets.
|
||||
t.Checksum = b.computeOffsets(n, true)
|
||||
// We already subtracted the normal blockOffset from the index. Subtract the
|
||||
// difference for starter bytes.
|
||||
t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)
|
||||
return t.Checksum
|
||||
}
|
||||
|
||||
func (b *builder) computeOffsets(n *node, root bool) uint64 {
|
||||
// For the first trie, the root lookup block will be at position 3, which is
|
||||
// the offset for UTF-8 non-ASCII starter bytes.
|
||||
first := len(b.IndexBlocks) == rootBlockOffset
|
||||
if first {
|
||||
b.IndexBlocks = append(b.IndexBlocks, n)
|
||||
}
|
||||
|
||||
// We special-case the cases where all values recursively are 0. This allows
|
||||
// for the use of a zero block to which all such values can be directed.
|
||||
hash := uint64(0)
|
||||
if n.children != nil || n.values != nil {
|
||||
hasher := crc64.New(crcTable)
|
||||
for _, c := range n.children {
|
||||
var v uint64
|
||||
if c != nil {
|
||||
v = b.computeOffsets(c, false)
|
||||
}
|
||||
binary.Write(hasher, binary.BigEndian, v)
|
||||
}
|
||||
binary.Write(hasher, binary.BigEndian, n.values)
|
||||
hash = hasher.Sum64()
|
||||
}
|
||||
|
||||
if first {
|
||||
b.indexBlockIdx[hash] = rootBlockOffset - blockOffset
|
||||
}
|
||||
|
||||
// Compacters don't apply to internal nodes.
|
||||
if n.children != nil {
|
||||
v, ok := b.indexBlockIdx[hash]
|
||||
if !ok {
|
||||
v = len(b.IndexBlocks) - blockOffset
|
||||
b.IndexBlocks = append(b.IndexBlocks, n)
|
||||
b.indexBlockIdx[hash] = v
|
||||
}
|
||||
n.index = nodeIndex{0, v}
|
||||
} else {
|
||||
h, ok := b.valueBlockIdx[hash]
|
||||
if !ok {
|
||||
bestI, bestSize := 0, blockSize*b.ValueSize
|
||||
for i, c := range b.Compactions[1:] {
|
||||
if sz, ok := c.c.Size(n.values); ok && bestSize > sz {
|
||||
bestI, bestSize = i+1, sz
|
||||
}
|
||||
}
|
||||
c := &b.Compactions[bestI]
|
||||
c.totalSize += bestSize
|
||||
v := c.c.Store(n.values)
|
||||
if c.maxHandle < v {
|
||||
c.maxHandle = v
|
||||
}
|
||||
h = nodeIndex{bestI, int(v)}
|
||||
b.valueBlockIdx[hash] = h
|
||||
}
|
||||
n.index = h
|
||||
}
|
||||
return hash
|
||||
}
|
371
vendor/golang.org/x/text/internal/ucd/ucd.go
generated
vendored
Normal file
371
vendor/golang.org/x/text/internal/ucd/ucd.go
generated
vendored
Normal file
@ -0,0 +1,371 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package ucd provides a parser for Unicode Character Database files, the
|
||||
// format of which is defined in https://www.unicode.org/reports/tr44/. See
|
||||
// https://www.unicode.org/Public/UCD/latest/ucd/ for example files.
|
||||
//
|
||||
// It currently does not support substitutions of missing fields.
|
||||
package ucd // import "golang.org/x/text/internal/ucd"
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// UnicodeData.txt fields.
|
||||
const (
|
||||
CodePoint = iota
|
||||
Name
|
||||
GeneralCategory
|
||||
CanonicalCombiningClass
|
||||
BidiClass
|
||||
DecompMapping
|
||||
DecimalValue
|
||||
DigitValue
|
||||
NumericValue
|
||||
BidiMirrored
|
||||
Unicode1Name
|
||||
ISOComment
|
||||
SimpleUppercaseMapping
|
||||
SimpleLowercaseMapping
|
||||
SimpleTitlecaseMapping
|
||||
)
|
||||
|
||||
// Parse calls f for each entry in the given reader of a UCD file. It will close
|
||||
// the reader upon return. It will call log.Fatal if any error occurred.
|
||||
//
|
||||
// This implements the most common usage pattern of using Parser.
|
||||
func Parse(r io.ReadCloser, f func(p *Parser)) {
|
||||
defer r.Close()
|
||||
|
||||
p := New(r)
|
||||
for p.Next() {
|
||||
f(p)
|
||||
}
|
||||
if err := p.Err(); err != nil {
|
||||
r.Close() // os.Exit will cause defers not to be called.
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// An Option is used to configure a Parser.
|
||||
type Option func(p *Parser)
|
||||
|
||||
func keepRanges(p *Parser) {
|
||||
p.keepRanges = true
|
||||
}
|
||||
|
||||
var (
|
||||
// KeepRanges prevents the expansion of ranges. The raw ranges can be
|
||||
// obtained by calling Range(0) on the parser.
|
||||
KeepRanges Option = keepRanges
|
||||
)
|
||||
|
||||
// The Part option register a handler for lines starting with a '@'. The text
|
||||
// after a '@' is available as the first field. Comments are handled as usual.
|
||||
func Part(f func(p *Parser)) Option {
|
||||
return func(p *Parser) {
|
||||
p.partHandler = f
|
||||
}
|
||||
}
|
||||
|
||||
// The CommentHandler option passes comments that are on a line by itself to
|
||||
// a given handler.
|
||||
func CommentHandler(f func(s string)) Option {
|
||||
return func(p *Parser) {
|
||||
p.commentHandler = f
|
||||
}
|
||||
}
|
||||
|
||||
// A Parser parses Unicode Character Database (UCD) files.
|
||||
type Parser struct {
|
||||
scanner *bufio.Scanner
|
||||
|
||||
keepRanges bool // Don't expand rune ranges in field 0.
|
||||
|
||||
err error
|
||||
comment string
|
||||
field []string
|
||||
// parsedRange is needed in case Range(0) is called more than once for one
|
||||
// field. In some cases this requires scanning ahead.
|
||||
line int
|
||||
parsedRange bool
|
||||
rangeStart, rangeEnd rune
|
||||
|
||||
partHandler func(p *Parser)
|
||||
commentHandler func(s string)
|
||||
}
|
||||
|
||||
func (p *Parser) setError(err error, msg string) {
|
||||
if p.err == nil && err != nil {
|
||||
if msg == "" {
|
||||
p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err)
|
||||
} else {
|
||||
p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Parser) getField(i int) string {
|
||||
if i >= len(p.field) {
|
||||
return ""
|
||||
}
|
||||
return p.field[i]
|
||||
}
|
||||
|
||||
// Err returns a non-nil error if any error occurred during parsing.
|
||||
func (p *Parser) Err() error {
|
||||
return p.err
|
||||
}
|
||||
|
||||
// New returns a Parser for the given Reader.
|
||||
func New(r io.Reader, o ...Option) *Parser {
|
||||
p := &Parser{
|
||||
scanner: bufio.NewScanner(r),
|
||||
}
|
||||
for _, f := range o {
|
||||
f(p)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// Next parses the next line in the file. It returns true if a line was parsed
|
||||
// and false if it reached the end of the file.
|
||||
func (p *Parser) Next() bool {
|
||||
if !p.keepRanges && p.rangeStart < p.rangeEnd {
|
||||
p.rangeStart++
|
||||
return true
|
||||
}
|
||||
p.comment = ""
|
||||
p.field = p.field[:0]
|
||||
p.parsedRange = false
|
||||
|
||||
for p.scanner.Scan() && p.err == nil {
|
||||
p.line++
|
||||
s := p.scanner.Text()
|
||||
if s == "" {
|
||||
continue
|
||||
}
|
||||
if s[0] == '#' {
|
||||
if p.commentHandler != nil {
|
||||
p.commentHandler(strings.TrimSpace(s[1:]))
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse line
|
||||
if i := strings.IndexByte(s, '#'); i != -1 {
|
||||
p.comment = strings.TrimSpace(s[i+1:])
|
||||
s = s[:i]
|
||||
}
|
||||
if s[0] == '@' {
|
||||
if p.partHandler != nil {
|
||||
p.field = append(p.field, strings.TrimSpace(s[1:]))
|
||||
p.partHandler(p)
|
||||
p.field = p.field[:0]
|
||||
}
|
||||
p.comment = ""
|
||||
continue
|
||||
}
|
||||
for {
|
||||
i := strings.IndexByte(s, ';')
|
||||
if i == -1 {
|
||||
p.field = append(p.field, strings.TrimSpace(s))
|
||||
break
|
||||
}
|
||||
p.field = append(p.field, strings.TrimSpace(s[:i]))
|
||||
s = s[i+1:]
|
||||
}
|
||||
if !p.keepRanges {
|
||||
p.rangeStart, p.rangeEnd = p.getRange(0)
|
||||
}
|
||||
return true
|
||||
}
|
||||
p.setError(p.scanner.Err(), "scanner failed")
|
||||
return false
|
||||
}
|
||||
|
||||
func parseRune(b string) (rune, error) {
|
||||
if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
|
||||
b = b[2:]
|
||||
}
|
||||
x, err := strconv.ParseUint(b, 16, 32)
|
||||
return rune(x), err
|
||||
}
|
||||
|
||||
func (p *Parser) parseRune(s string) rune {
|
||||
x, err := parseRune(s)
|
||||
p.setError(err, "failed to parse rune")
|
||||
return x
|
||||
}
|
||||
|
||||
// Rune parses and returns field i as a rune.
|
||||
func (p *Parser) Rune(i int) rune {
|
||||
if i > 0 || p.keepRanges {
|
||||
return p.parseRune(p.getField(i))
|
||||
}
|
||||
return p.rangeStart
|
||||
}
|
||||
|
||||
// Runes interprets and returns field i as a sequence of runes.
|
||||
func (p *Parser) Runes(i int) (runes []rune) {
|
||||
add := func(s string) {
|
||||
if s = strings.TrimSpace(s); len(s) > 0 {
|
||||
runes = append(runes, p.parseRune(s))
|
||||
}
|
||||
}
|
||||
for b := p.getField(i); ; {
|
||||
i := strings.IndexByte(b, ' ')
|
||||
if i == -1 {
|
||||
add(b)
|
||||
break
|
||||
}
|
||||
add(b[:i])
|
||||
b = b[i+1:]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
|
||||
|
||||
// reRange matches one line of a legacy rune range.
|
||||
reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$")
|
||||
)
|
||||
|
||||
// Range parses and returns field i as a rune range. A range is inclusive at
|
||||
// both ends. If the field only has one rune, first and last will be identical.
|
||||
// It supports the legacy format for ranges used in UnicodeData.txt.
|
||||
func (p *Parser) Range(i int) (first, last rune) {
|
||||
if !p.keepRanges {
|
||||
return p.rangeStart, p.rangeStart
|
||||
}
|
||||
return p.getRange(i)
|
||||
}
|
||||
|
||||
func (p *Parser) getRange(i int) (first, last rune) {
|
||||
b := p.getField(i)
|
||||
if k := strings.Index(b, ".."); k != -1 {
|
||||
return p.parseRune(b[:k]), p.parseRune(b[k+2:])
|
||||
}
|
||||
// The first field may not be a rune, in which case we may ignore any error
|
||||
// and set the range as 0..0.
|
||||
x, err := parseRune(b)
|
||||
if err != nil {
|
||||
// Disable range parsing henceforth. This ensures that an error will be
|
||||
// returned if the user subsequently will try to parse this field as
|
||||
// a Rune.
|
||||
p.keepRanges = true
|
||||
}
|
||||
// Special case for UnicodeData that was retained for backwards compatibility.
|
||||
if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") {
|
||||
if p.parsedRange {
|
||||
return p.rangeStart, p.rangeEnd
|
||||
}
|
||||
mf := reRange.FindStringSubmatch(p.scanner.Text())
|
||||
p.line++
|
||||
if mf == nil || !p.scanner.Scan() {
|
||||
p.setError(errIncorrectLegacyRange, "")
|
||||
return x, x
|
||||
}
|
||||
// Using Bytes would be more efficient here, but Text is a lot easier
|
||||
// and this is not a frequent case.
|
||||
ml := reRange.FindStringSubmatch(p.scanner.Text())
|
||||
if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
|
||||
p.setError(errIncorrectLegacyRange, "")
|
||||
return x, x
|
||||
}
|
||||
p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])])
|
||||
p.parsedRange = true
|
||||
return p.rangeStart, p.rangeEnd
|
||||
}
|
||||
return x, x
|
||||
}
|
||||
|
||||
// bools recognizes all valid UCD boolean values.
|
||||
var bools = map[string]bool{
|
||||
"": false,
|
||||
"N": false,
|
||||
"No": false,
|
||||
"F": false,
|
||||
"False": false,
|
||||
"Y": true,
|
||||
"Yes": true,
|
||||
"T": true,
|
||||
"True": true,
|
||||
}
|
||||
|
||||
// Bool parses and returns field i as a boolean value.
|
||||
func (p *Parser) Bool(i int) bool {
|
||||
f := p.getField(i)
|
||||
for s, v := range bools {
|
||||
if f == s {
|
||||
return v
|
||||
}
|
||||
}
|
||||
p.setError(strconv.ErrSyntax, "error parsing bool")
|
||||
return false
|
||||
}
|
||||
|
||||
// Int parses and returns field i as an integer value.
|
||||
func (p *Parser) Int(i int) int {
|
||||
x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
|
||||
p.setError(err, "error parsing int")
|
||||
return int(x)
|
||||
}
|
||||
|
||||
// Uint parses and returns field i as an unsigned integer value.
|
||||
func (p *Parser) Uint(i int) uint {
|
||||
x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
|
||||
p.setError(err, "error parsing uint")
|
||||
return uint(x)
|
||||
}
|
||||
|
||||
// Float parses and returns field i as a decimal value.
|
||||
func (p *Parser) Float(i int) float64 {
|
||||
x, err := strconv.ParseFloat(string(p.getField(i)), 64)
|
||||
p.setError(err, "error parsing float")
|
||||
return x
|
||||
}
|
||||
|
||||
// String parses and returns field i as a string value.
|
||||
func (p *Parser) String(i int) string {
|
||||
return string(p.getField(i))
|
||||
}
|
||||
|
||||
// Strings parses and returns field i as a space-separated list of strings.
|
||||
func (p *Parser) Strings(i int) []string {
|
||||
ss := strings.Split(string(p.getField(i)), " ")
|
||||
for i, s := range ss {
|
||||
ss[i] = strings.TrimSpace(s)
|
||||
}
|
||||
return ss
|
||||
}
|
||||
|
||||
// Comment returns the comments for the current line.
|
||||
func (p *Parser) Comment() string {
|
||||
return string(p.comment)
|
||||
}
|
||||
|
||||
var errUndefinedEnum = errors.New("ucd: undefined enum value")
|
||||
|
||||
// Enum interprets and returns field i as a value that must be one of the values
|
||||
// in enum.
|
||||
func (p *Parser) Enum(i int, enum ...string) string {
|
||||
f := p.getField(i)
|
||||
for _, s := range enum {
|
||||
if f == s {
|
||||
return s
|
||||
}
|
||||
}
|
||||
p.setError(errUndefinedEnum, "error parsing enum")
|
||||
return ""
|
||||
}
|
Reference in New Issue
Block a user