Merge master into breakout_swarm

Signed-off-by: Alex Ellis <alexellis2@gmail.com>
This commit is contained in:
Alex Ellis
2018-02-01 09:25:39 +00:00
parent afeb7bbce4
commit f954bf0733
1953 changed files with 614131 additions and 175582 deletions

2
gateway/vendor/github.com/beorn7/perks/.gitignore generated vendored Normal file
View File

@ -0,0 +1,2 @@
*.test
*.prof

View File

@ -0,0 +1,26 @@
package histogram
import (
"math/rand"
"testing"
)
func BenchmarkInsert10Bins(b *testing.B) {
b.StopTimer()
h := New(10)
b.StartTimer()
for i := 0; i < b.N; i++ {
f := rand.ExpFloat64()
h.Insert(f)
}
}
func BenchmarkInsert100Bins(b *testing.B) {
b.StopTimer()
h := New(100)
b.StartTimer()
for i := 0; i < b.N; i++ {
f := rand.ExpFloat64()
h.Insert(f)
}
}

View File

@ -0,0 +1,108 @@
// Package histogram provides a Go implementation of BigML's histogram package
// for Clojure/Java. It is currently experimental.
package histogram
import (
"container/heap"
"math"
"sort"
)
type Bin struct {
Count int
Sum float64
}
func (b *Bin) Update(x *Bin) {
b.Count += x.Count
b.Sum += x.Sum
}
func (b *Bin) Mean() float64 {
return b.Sum / float64(b.Count)
}
type Bins []*Bin
func (bs Bins) Len() int { return len(bs) }
func (bs Bins) Less(i, j int) bool { return bs[i].Mean() < bs[j].Mean() }
func (bs Bins) Swap(i, j int) { bs[i], bs[j] = bs[j], bs[i] }
func (bs *Bins) Push(x interface{}) {
*bs = append(*bs, x.(*Bin))
}
func (bs *Bins) Pop() interface{} {
return bs.remove(len(*bs) - 1)
}
func (bs *Bins) remove(n int) *Bin {
if n < 0 || len(*bs) < n {
return nil
}
x := (*bs)[n]
*bs = append((*bs)[:n], (*bs)[n+1:]...)
return x
}
type Histogram struct {
res *reservoir
}
func New(maxBins int) *Histogram {
return &Histogram{res: newReservoir(maxBins)}
}
func (h *Histogram) Insert(f float64) {
h.res.insert(&Bin{1, f})
h.res.compress()
}
func (h *Histogram) Bins() Bins {
return h.res.bins
}
type reservoir struct {
n int
maxBins int
bins Bins
}
func newReservoir(maxBins int) *reservoir {
return &reservoir{maxBins: maxBins}
}
func (r *reservoir) insert(bin *Bin) {
r.n += bin.Count
i := sort.Search(len(r.bins), func(i int) bool {
return r.bins[i].Mean() >= bin.Mean()
})
if i < 0 || i == r.bins.Len() {
// TODO(blake): Maybe use an .insert(i, bin) instead of
// performing the extra work of a heap.Push.
heap.Push(&r.bins, bin)
return
}
r.bins[i].Update(bin)
}
func (r *reservoir) compress() {
for r.bins.Len() > r.maxBins {
minGapIndex := -1
minGap := math.MaxFloat64
for i := 0; i < r.bins.Len()-1; i++ {
gap := gapWeight(r.bins[i], r.bins[i+1])
if minGap > gap {
minGap = gap
minGapIndex = i
}
}
prev := r.bins[minGapIndex]
next := r.bins.remove(minGapIndex + 1)
prev.Update(next)
}
}
func gapWeight(prev, next *Bin) float64 {
return next.Mean() - prev.Mean()
}

View File

@ -0,0 +1,38 @@
package histogram
import (
"math/rand"
"testing"
)
func TestHistogram(t *testing.T) {
const numPoints = 1e6
const maxBins = 3
h := New(maxBins)
for i := 0; i < numPoints; i++ {
f := rand.ExpFloat64()
h.Insert(f)
}
bins := h.Bins()
if g := len(bins); g > maxBins {
t.Fatalf("got %d bins, wanted <= %d", g, maxBins)
}
for _, b := range bins {
t.Logf("%+v", b)
}
if g := count(h.Bins()); g != numPoints {
t.Fatalf("binned %d points, wanted %d", g, numPoints)
}
}
func count(bins Bins) int {
binCounts := 0
for _, b := range bins {
binCounts += b.Count
}
return binCounts
}

View File

@ -0,0 +1,63 @@
package quantile
import (
"testing"
)
func BenchmarkInsertTargeted(b *testing.B) {
b.ReportAllocs()
s := NewTargeted(Targets)
b.ResetTimer()
for i := float64(0); i < float64(b.N); i++ {
s.Insert(i)
}
}
func BenchmarkInsertTargetedSmallEpsilon(b *testing.B) {
s := NewTargeted(TargetsSmallEpsilon)
b.ResetTimer()
for i := float64(0); i < float64(b.N); i++ {
s.Insert(i)
}
}
func BenchmarkInsertBiased(b *testing.B) {
s := NewLowBiased(0.01)
b.ResetTimer()
for i := float64(0); i < float64(b.N); i++ {
s.Insert(i)
}
}
func BenchmarkInsertBiasedSmallEpsilon(b *testing.B) {
s := NewLowBiased(0.0001)
b.ResetTimer()
for i := float64(0); i < float64(b.N); i++ {
s.Insert(i)
}
}
func BenchmarkQuery(b *testing.B) {
s := NewTargeted(Targets)
for i := float64(0); i < 1e6; i++ {
s.Insert(i)
}
b.ResetTimer()
n := float64(b.N)
for i := float64(0); i < n; i++ {
s.Query(i / n)
}
}
func BenchmarkQuerySmallEpsilon(b *testing.B) {
s := NewTargeted(TargetsSmallEpsilon)
for i := float64(0); i < 1e6; i++ {
s.Insert(i)
}
b.ResetTimer()
n := float64(b.N)
for i := float64(0); i < n; i++ {
s.Query(i / n)
}
}

View File

@ -0,0 +1,121 @@
// +build go1.1
package quantile_test
import (
"bufio"
"fmt"
"log"
"os"
"strconv"
"time"
"github.com/beorn7/perks/quantile"
)
func Example_simple() {
ch := make(chan float64)
go sendFloats(ch)
// Compute the 50th, 90th, and 99th percentile.
q := quantile.NewTargeted(map[float64]float64{
0.50: 0.005,
0.90: 0.001,
0.99: 0.0001,
})
for v := range ch {
q.Insert(v)
}
fmt.Println("perc50:", q.Query(0.50))
fmt.Println("perc90:", q.Query(0.90))
fmt.Println("perc99:", q.Query(0.99))
fmt.Println("count:", q.Count())
// Output:
// perc50: 5
// perc90: 16
// perc99: 223
// count: 2388
}
func Example_mergeMultipleStreams() {
// Scenario:
// We have multiple database shards. On each shard, there is a process
// collecting query response times from the database logs and inserting
// them into a Stream (created via NewTargeted(0.90)), much like the
// Simple example. These processes expose a network interface for us to
// ask them to serialize and send us the results of their
// Stream.Samples so we may Merge and Query them.
//
// NOTES:
// * These sample sets are small, allowing us to get them
// across the network much faster than sending the entire list of data
// points.
//
// * For this to work correctly, we must supply the same quantiles
// a priori the process collecting the samples supplied to NewTargeted,
// even if we do not plan to query them all here.
ch := make(chan quantile.Samples)
getDBQuerySamples(ch)
q := quantile.NewTargeted(map[float64]float64{0.90: 0.001})
for samples := range ch {
q.Merge(samples)
}
fmt.Println("perc90:", q.Query(0.90))
}
func Example_window() {
// Scenario: We want the 90th, 95th, and 99th percentiles for each
// minute.
ch := make(chan float64)
go sendStreamValues(ch)
tick := time.NewTicker(1 * time.Minute)
q := quantile.NewTargeted(map[float64]float64{
0.90: 0.001,
0.95: 0.0005,
0.99: 0.0001,
})
for {
select {
case t := <-tick.C:
flushToDB(t, q.Samples())
q.Reset()
case v := <-ch:
q.Insert(v)
}
}
}
func sendStreamValues(ch chan float64) {
// Use your imagination
}
func flushToDB(t time.Time, samples quantile.Samples) {
// Use your imagination
}
// This is a stub for the above example. In reality this would hit the remote
// servers via http or something like it.
func getDBQuerySamples(ch chan quantile.Samples) {}
func sendFloats(ch chan<- float64) {
f, err := os.Open("exampledata.txt")
if err != nil {
log.Fatal(err)
}
sc := bufio.NewScanner(f)
for sc.Scan() {
b := sc.Bytes()
v, err := strconv.ParseFloat(string(b), 64)
if err != nil {
log.Fatal(err)
}
ch <- v
}
if sc.Err() != nil {
log.Fatal(sc.Err())
}
close(ch)
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,215 @@
package quantile
import (
"math"
"math/rand"
"sort"
"testing"
)
var (
Targets = map[float64]float64{
0.01: 0.001,
0.10: 0.01,
0.50: 0.05,
0.90: 0.01,
0.99: 0.001,
}
TargetsSmallEpsilon = map[float64]float64{
0.01: 0.0001,
0.10: 0.001,
0.50: 0.005,
0.90: 0.001,
0.99: 0.0001,
}
LowQuantiles = []float64{0.01, 0.1, 0.5}
HighQuantiles = []float64{0.99, 0.9, 0.5}
)
const RelativeEpsilon = 0.01
func verifyPercsWithAbsoluteEpsilon(t *testing.T, a []float64, s *Stream) {
sort.Float64s(a)
for quantile, epsilon := range Targets {
n := float64(len(a))
k := int(quantile * n)
if k < 1 {
k = 1
}
lower := int((quantile - epsilon) * n)
if lower < 1 {
lower = 1
}
upper := int(math.Ceil((quantile + epsilon) * n))
if upper > len(a) {
upper = len(a)
}
w, min, max := a[k-1], a[lower-1], a[upper-1]
if g := s.Query(quantile); g < min || g > max {
t.Errorf("q=%f: want %v [%f,%f], got %v", quantile, w, min, max, g)
}
}
}
func verifyLowPercsWithRelativeEpsilon(t *testing.T, a []float64, s *Stream) {
sort.Float64s(a)
for _, qu := range LowQuantiles {
n := float64(len(a))
k := int(qu * n)
lowerRank := int((1 - RelativeEpsilon) * qu * n)
upperRank := int(math.Ceil((1 + RelativeEpsilon) * qu * n))
w, min, max := a[k-1], a[lowerRank-1], a[upperRank-1]
if g := s.Query(qu); g < min || g > max {
t.Errorf("q=%f: want %v [%f,%f], got %v", qu, w, min, max, g)
}
}
}
func verifyHighPercsWithRelativeEpsilon(t *testing.T, a []float64, s *Stream) {
sort.Float64s(a)
for _, qu := range HighQuantiles {
n := float64(len(a))
k := int(qu * n)
lowerRank := int((1 - (1+RelativeEpsilon)*(1-qu)) * n)
upperRank := int(math.Ceil((1 - (1-RelativeEpsilon)*(1-qu)) * n))
w, min, max := a[k-1], a[lowerRank-1], a[upperRank-1]
if g := s.Query(qu); g < min || g > max {
t.Errorf("q=%f: want %v [%f,%f], got %v", qu, w, min, max, g)
}
}
}
func populateStream(s *Stream) []float64 {
a := make([]float64, 0, 1e5+100)
for i := 0; i < cap(a); i++ {
v := rand.NormFloat64()
// Add 5% asymmetric outliers.
if i%20 == 0 {
v = v*v + 1
}
s.Insert(v)
a = append(a, v)
}
return a
}
func TestTargetedQuery(t *testing.T) {
rand.Seed(42)
s := NewTargeted(Targets)
a := populateStream(s)
verifyPercsWithAbsoluteEpsilon(t, a, s)
}
func TestTargetedQuerySmallSampleSize(t *testing.T) {
rand.Seed(42)
s := NewTargeted(TargetsSmallEpsilon)
a := []float64{1, 2, 3, 4, 5}
for _, v := range a {
s.Insert(v)
}
verifyPercsWithAbsoluteEpsilon(t, a, s)
// If not yet flushed, results should be precise:
if !s.flushed() {
for φ, want := range map[float64]float64{
0.01: 1,
0.10: 1,
0.50: 3,
0.90: 5,
0.99: 5,
} {
if got := s.Query(φ); got != want {
t.Errorf("want %f for φ=%f, got %f", want, φ, got)
}
}
}
}
func TestLowBiasedQuery(t *testing.T) {
rand.Seed(42)
s := NewLowBiased(RelativeEpsilon)
a := populateStream(s)
verifyLowPercsWithRelativeEpsilon(t, a, s)
}
func TestHighBiasedQuery(t *testing.T) {
rand.Seed(42)
s := NewHighBiased(RelativeEpsilon)
a := populateStream(s)
verifyHighPercsWithRelativeEpsilon(t, a, s)
}
// BrokenTestTargetedMerge is broken, see Merge doc comment.
func BrokenTestTargetedMerge(t *testing.T) {
rand.Seed(42)
s1 := NewTargeted(Targets)
s2 := NewTargeted(Targets)
a := populateStream(s1)
a = append(a, populateStream(s2)...)
s1.Merge(s2.Samples())
verifyPercsWithAbsoluteEpsilon(t, a, s1)
}
// BrokenTestLowBiasedMerge is broken, see Merge doc comment.
func BrokenTestLowBiasedMerge(t *testing.T) {
rand.Seed(42)
s1 := NewLowBiased(RelativeEpsilon)
s2 := NewLowBiased(RelativeEpsilon)
a := populateStream(s1)
a = append(a, populateStream(s2)...)
s1.Merge(s2.Samples())
verifyLowPercsWithRelativeEpsilon(t, a, s2)
}
// BrokenTestHighBiasedMerge is broken, see Merge doc comment.
func BrokenTestHighBiasedMerge(t *testing.T) {
rand.Seed(42)
s1 := NewHighBiased(RelativeEpsilon)
s2 := NewHighBiased(RelativeEpsilon)
a := populateStream(s1)
a = append(a, populateStream(s2)...)
s1.Merge(s2.Samples())
verifyHighPercsWithRelativeEpsilon(t, a, s2)
}
func TestUncompressed(t *testing.T) {
q := NewTargeted(Targets)
for i := 100; i > 0; i-- {
q.Insert(float64(i))
}
if g := q.Count(); g != 100 {
t.Errorf("want count 100, got %d", g)
}
// Before compression, Query should have 100% accuracy.
for quantile := range Targets {
w := quantile * 100
if g := q.Query(quantile); g != w {
t.Errorf("want %f, got %f", w, g)
}
}
}
func TestUncompressedSamples(t *testing.T) {
q := NewTargeted(map[float64]float64{0.99: 0.001})
for i := 1; i <= 100; i++ {
q.Insert(float64(i))
}
if g := q.Samples().Len(); g != 100 {
t.Errorf("want count 100, got %d", g)
}
}
func TestUncompressedOne(t *testing.T) {
q := NewTargeted(map[float64]float64{0.99: 0.01})
q.Insert(3.14)
if g := q.Query(0.90); g != 3.14 {
t.Error("want PI, got", g)
}
}
func TestDefaults(t *testing.T) {
if g := NewTargeted(map[float64]float64{0.99: 0.001}).Query(0.99); g != 0 {
t.Errorf("want 0, got %f", g)
}
}

90
gateway/vendor/github.com/beorn7/perks/topk/topk.go generated vendored Normal file
View File

@ -0,0 +1,90 @@
package topk
import (
"sort"
)
// http://www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf
type Element struct {
Value string
Count int
}
type Samples []*Element
func (sm Samples) Len() int {
return len(sm)
}
func (sm Samples) Less(i, j int) bool {
return sm[i].Count < sm[j].Count
}
func (sm Samples) Swap(i, j int) {
sm[i], sm[j] = sm[j], sm[i]
}
type Stream struct {
k int
mon map[string]*Element
// the minimum Element
min *Element
}
func New(k int) *Stream {
s := new(Stream)
s.k = k
s.mon = make(map[string]*Element)
s.min = &Element{}
// Track k+1 so that less frequenet items contended for that spot,
// resulting in k being more accurate.
return s
}
func (s *Stream) Insert(x string) {
s.insert(&Element{x, 1})
}
func (s *Stream) Merge(sm Samples) {
for _, e := range sm {
s.insert(e)
}
}
func (s *Stream) insert(in *Element) {
e := s.mon[in.Value]
if e != nil {
e.Count++
} else {
if len(s.mon) < s.k+1 {
e = &Element{in.Value, in.Count}
s.mon[in.Value] = e
} else {
e = s.min
delete(s.mon, e.Value)
e.Value = in.Value
e.Count += in.Count
s.min = e
}
}
if e.Count < s.min.Count {
s.min = e
}
}
func (s *Stream) Query() Samples {
var sm Samples
for _, e := range s.mon {
sm = append(sm, e)
}
sort.Sort(sort.Reverse(sm))
if len(sm) < s.k {
return sm
}
return sm[:s.k]
}

View File

@ -0,0 +1,57 @@
package topk
import (
"fmt"
"math/rand"
"sort"
"testing"
)
func TestTopK(t *testing.T) {
stream := New(10)
ss := []*Stream{New(10), New(10), New(10)}
m := make(map[string]int)
for _, s := range ss {
for i := 0; i < 1e6; i++ {
v := fmt.Sprintf("%x", int8(rand.ExpFloat64()))
s.Insert(v)
m[v]++
}
stream.Merge(s.Query())
}
var sm Samples
for x, s := range m {
sm = append(sm, &Element{x, s})
}
sort.Sort(sort.Reverse(sm))
g := stream.Query()
if len(g) != 10 {
t.Fatalf("got %d, want 10", len(g))
}
for i, e := range g {
if sm[i].Value != e.Value {
t.Errorf("at %d: want %q, got %q", i, sm[i].Value, e.Value)
}
}
}
func TestQuery(t *testing.T) {
queryTests := []struct {
value string
expected int
}{
{"a", 1},
{"b", 2},
{"c", 2},
}
stream := New(2)
for _, tt := range queryTests {
stream.Insert(tt.value)
if n := len(stream.Query()); n != tt.expected {
t.Errorf("want %d, got %d", tt.expected, n)
}
}
}