...

Source file src/github.com/beorn7/perks/histogram/histogram.go

Documentation: github.com/beorn7/perks/histogram

     1  // Package histogram provides a Go implementation of BigML's histogram package
     2  // for Clojure/Java. It is currently experimental.
     3  package histogram
     4  
     5  import (
     6  	"container/heap"
     7  	"math"
     8  	"sort"
     9  )
    10  
    11  type Bin struct {
    12  	Count int
    13  	Sum   float64
    14  }
    15  
    16  func (b *Bin) Update(x *Bin) {
    17  	b.Count += x.Count
    18  	b.Sum += x.Sum
    19  }
    20  
    21  func (b *Bin) Mean() float64 {
    22  	return b.Sum / float64(b.Count)
    23  }
    24  
    25  type Bins []*Bin
    26  
    27  func (bs Bins) Len() int           { return len(bs) }
    28  func (bs Bins) Less(i, j int) bool { return bs[i].Mean() < bs[j].Mean() }
    29  func (bs Bins) Swap(i, j int)      { bs[i], bs[j] = bs[j], bs[i] }
    30  
    31  func (bs *Bins) Push(x interface{}) {
    32  	*bs = append(*bs, x.(*Bin))
    33  }
    34  
    35  func (bs *Bins) Pop() interface{} {
    36  	return bs.remove(len(*bs) - 1)
    37  }
    38  
    39  func (bs *Bins) remove(n int) *Bin {
    40  	if n < 0 || len(*bs) < n {
    41  		return nil
    42  	}
    43  	x := (*bs)[n]
    44  	*bs = append((*bs)[:n], (*bs)[n+1:]...)
    45  	return x
    46  }
    47  
    48  type Histogram struct {
    49  	res *reservoir
    50  }
    51  
    52  func New(maxBins int) *Histogram {
    53  	return &Histogram{res: newReservoir(maxBins)}
    54  }
    55  
    56  func (h *Histogram) Insert(f float64) {
    57  	h.res.insert(&Bin{1, f})
    58  	h.res.compress()
    59  }
    60  
    61  func (h *Histogram) Bins() Bins {
    62  	return h.res.bins
    63  }
    64  
    65  type reservoir struct {
    66  	n       int
    67  	maxBins int
    68  	bins    Bins
    69  }
    70  
    71  func newReservoir(maxBins int) *reservoir {
    72  	return &reservoir{maxBins: maxBins}
    73  }
    74  
    75  func (r *reservoir) insert(bin *Bin) {
    76  	r.n += bin.Count
    77  	i := sort.Search(len(r.bins), func(i int) bool {
    78  		return r.bins[i].Mean() >= bin.Mean()
    79  	})
    80  	if i < 0 || i == r.bins.Len() {
    81  		// TODO(blake): Maybe use an .insert(i, bin) instead of
    82  		// performing the extra work of a heap.Push.
    83  		heap.Push(&r.bins, bin)
    84  		return
    85  	}
    86  	r.bins[i].Update(bin)
    87  }
    88  
    89  func (r *reservoir) compress() {
    90  	for r.bins.Len() > r.maxBins {
    91  		minGapIndex := -1
    92  		minGap := math.MaxFloat64
    93  		for i := 0; i < r.bins.Len()-1; i++ {
    94  			gap := gapWeight(r.bins[i], r.bins[i+1])
    95  			if minGap > gap {
    96  				minGap = gap
    97  				minGapIndex = i
    98  			}
    99  		}
   100  		prev := r.bins[minGapIndex]
   101  		next := r.bins.remove(minGapIndex + 1)
   102  		prev.Update(next)
   103  	}
   104  }
   105  
   106  func gapWeight(prev, next *Bin) float64 {
   107  	return next.Mean() - prev.Mean()
   108  }
   109  

View as plain text