| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564 |
- // Package hdrhistogram provides an implementation of Gil Tene's HDR Histogram
- // data structure. The HDR Histogram allows for fast and accurate analysis of
- // the extreme ranges of data with non-normal distributions, like latency.
- package hdrhistogram
- import (
- "fmt"
- "math"
- )
- // A Bracket is a part of a cumulative distribution.
- type Bracket struct {
- Quantile float64
- Count, ValueAt int64
- }
- // A Snapshot is an exported view of a Histogram, useful for serializing them.
- // A Histogram can be constructed from it by passing it to Import.
- type Snapshot struct {
- LowestTrackableValue int64
- HighestTrackableValue int64
- SignificantFigures int64
- Counts []int64
- }
- // A Histogram is a lossy data structure used to record the distribution of
- // non-normally distributed data (like latency) with a high degree of accuracy
- // and a bounded degree of precision.
- type Histogram struct {
- lowestTrackableValue int64
- highestTrackableValue int64
- unitMagnitude int64
- significantFigures int64
- subBucketHalfCountMagnitude int32
- subBucketHalfCount int32
- subBucketMask int64
- subBucketCount int32
- bucketCount int32
- countsLen int32
- totalCount int64
- counts []int64
- }
- // New returns a new Histogram instance capable of tracking values in the given
- // range and with the given amount of precision.
- func New(minValue, maxValue int64, sigfigs int) *Histogram {
- if sigfigs < 1 || 5 < sigfigs {
- panic(fmt.Errorf("sigfigs must be [1,5] (was %d)", sigfigs))
- }
- largestValueWithSingleUnitResolution := 2 * math.Pow10(sigfigs)
- subBucketCountMagnitude := int32(math.Ceil(math.Log2(float64(largestValueWithSingleUnitResolution))))
- subBucketHalfCountMagnitude := subBucketCountMagnitude
- if subBucketHalfCountMagnitude < 1 {
- subBucketHalfCountMagnitude = 1
- }
- subBucketHalfCountMagnitude--
- unitMagnitude := int32(math.Floor(math.Log2(float64(minValue))))
- if unitMagnitude < 0 {
- unitMagnitude = 0
- }
- subBucketCount := int32(math.Pow(2, float64(subBucketHalfCountMagnitude)+1))
- subBucketHalfCount := subBucketCount / 2
- subBucketMask := int64(subBucketCount-1) << uint(unitMagnitude)
- // determine exponent range needed to support the trackable value with no
- // overflow:
- smallestUntrackableValue := int64(subBucketCount) << uint(unitMagnitude)
- bucketsNeeded := int32(1)
- for smallestUntrackableValue < maxValue {
- smallestUntrackableValue <<= 1
- bucketsNeeded++
- }
- bucketCount := bucketsNeeded
- countsLen := (bucketCount + 1) * (subBucketCount / 2)
- return &Histogram{
- lowestTrackableValue: minValue,
- highestTrackableValue: maxValue,
- unitMagnitude: int64(unitMagnitude),
- significantFigures: int64(sigfigs),
- subBucketHalfCountMagnitude: subBucketHalfCountMagnitude,
- subBucketHalfCount: subBucketHalfCount,
- subBucketMask: subBucketMask,
- subBucketCount: subBucketCount,
- bucketCount: bucketCount,
- countsLen: countsLen,
- totalCount: 0,
- counts: make([]int64, countsLen),
- }
- }
- // ByteSize returns an estimate of the amount of memory allocated to the
- // histogram in bytes.
- //
- // N.B.: This does not take into account the overhead for slices, which are
- // small, constant, and specific to the compiler version.
- func (h *Histogram) ByteSize() int {
- return 6*8 + 5*4 + len(h.counts)*8
- }
- // Merge merges the data stored in the given histogram with the receiver,
- // returning the number of recorded values which had to be dropped.
- func (h *Histogram) Merge(from *Histogram) (dropped int64) {
- i := from.rIterator()
- for i.next() {
- v := i.valueFromIdx
- c := i.countAtIdx
- if h.RecordValues(v, c) != nil {
- dropped += c
- }
- }
- return
- }
- // TotalCount returns total number of values recorded.
- func (h *Histogram) TotalCount() int64 {
- return h.totalCount
- }
- // Max returns the approximate maximum recorded value.
- func (h *Histogram) Max() int64 {
- var max int64
- i := h.iterator()
- for i.next() {
- if i.countAtIdx != 0 {
- max = i.highestEquivalentValue
- }
- }
- return h.highestEquivalentValue(max)
- }
- // Min returns the approximate minimum recorded value.
- func (h *Histogram) Min() int64 {
- var min int64
- i := h.iterator()
- for i.next() {
- if i.countAtIdx != 0 && min == 0 {
- min = i.highestEquivalentValue
- break
- }
- }
- return h.lowestEquivalentValue(min)
- }
- // Mean returns the approximate arithmetic mean of the recorded values.
- func (h *Histogram) Mean() float64 {
- if h.totalCount == 0 {
- return 0
- }
- var total int64
- i := h.iterator()
- for i.next() {
- if i.countAtIdx != 0 {
- total += i.countAtIdx * h.medianEquivalentValue(i.valueFromIdx)
- }
- }
- return float64(total) / float64(h.totalCount)
- }
- // StdDev returns the approximate standard deviation of the recorded values.
- func (h *Histogram) StdDev() float64 {
- if h.totalCount == 0 {
- return 0
- }
- mean := h.Mean()
- geometricDevTotal := 0.0
- i := h.iterator()
- for i.next() {
- if i.countAtIdx != 0 {
- dev := float64(h.medianEquivalentValue(i.valueFromIdx)) - mean
- geometricDevTotal += (dev * dev) * float64(i.countAtIdx)
- }
- }
- return math.Sqrt(geometricDevTotal / float64(h.totalCount))
- }
- // Reset deletes all recorded values and restores the histogram to its original
- // state.
- func (h *Histogram) Reset() {
- h.totalCount = 0
- for i := range h.counts {
- h.counts[i] = 0
- }
- }
- // RecordValue records the given value, returning an error if the value is out
- // of range.
- func (h *Histogram) RecordValue(v int64) error {
- return h.RecordValues(v, 1)
- }
- // RecordCorrectedValue records the given value, correcting for stalls in the
- // recording process. This only works for processes which are recording values
- // at an expected interval (e.g., doing jitter analysis). Processes which are
- // recording ad-hoc values (e.g., latency for incoming requests) can't take
- // advantage of this.
- func (h *Histogram) RecordCorrectedValue(v, expectedInterval int64) error {
- if err := h.RecordValue(v); err != nil {
- return err
- }
- if expectedInterval <= 0 || v <= expectedInterval {
- return nil
- }
- missingValue := v - expectedInterval
- for missingValue >= expectedInterval {
- if err := h.RecordValue(missingValue); err != nil {
- return err
- }
- missingValue -= expectedInterval
- }
- return nil
- }
- // RecordValues records n occurrences of the given value, returning an error if
- // the value is out of range.
- func (h *Histogram) RecordValues(v, n int64) error {
- idx := h.countsIndexFor(v)
- if idx < 0 || int(h.countsLen) <= idx {
- return fmt.Errorf("value %d is too large to be recorded", v)
- }
- h.counts[idx] += n
- h.totalCount += n
- return nil
- }
- // ValueAtQuantile returns the recorded value at the given quantile (0..100).
- func (h *Histogram) ValueAtQuantile(q float64) int64 {
- if q > 100 {
- q = 100
- }
- total := int64(0)
- countAtPercentile := int64(((q / 100) * float64(h.totalCount)) + 0.5)
- i := h.iterator()
- for i.next() {
- total += i.countAtIdx
- if total >= countAtPercentile {
- return h.highestEquivalentValue(i.valueFromIdx)
- }
- }
- return 0
- }
- // CumulativeDistribution returns an ordered list of brackets of the
- // distribution of recorded values.
- func (h *Histogram) CumulativeDistribution() []Bracket {
- var result []Bracket
- i := h.pIterator(1)
- for i.next() {
- result = append(result, Bracket{
- Quantile: i.percentile,
- Count: i.countToIdx,
- ValueAt: i.highestEquivalentValue,
- })
- }
- return result
- }
- // SignificantFigures returns the significant figures used to create the
- // histogram
- func (h *Histogram) SignificantFigures() int64 {
- return h.significantFigures
- }
- // LowestTrackableValue returns the lower bound on values that will be added
- // to the histogram
- func (h *Histogram) LowestTrackableValue() int64 {
- return h.lowestTrackableValue
- }
- // HighestTrackableValue returns the upper bound on values that will be added
- // to the histogram
- func (h *Histogram) HighestTrackableValue() int64 {
- return h.highestTrackableValue
- }
- // Histogram bar for plotting
- type Bar struct {
- From, To, Count int64
- }
- // Pretty print as csv for easy plotting
- func (b Bar) String() string {
- return fmt.Sprintf("%v, %v, %v\n", b.From, b.To, b.Count)
- }
- // Distribution returns an ordered list of bars of the
- // distribution of recorded values, counts can be normalized to a probability
- func (h *Histogram) Distribution() (result []Bar) {
- i := h.iterator()
- for i.next() {
- result = append(result, Bar{
- Count: i.countAtIdx,
- From: h.lowestEquivalentValue(i.valueFromIdx),
- To: i.highestEquivalentValue,
- })
- }
- return result
- }
- // Equals returns true if the two Histograms are equivalent, false if not.
- func (h *Histogram) Equals(other *Histogram) bool {
- switch {
- case
- h.lowestTrackableValue != other.lowestTrackableValue,
- h.highestTrackableValue != other.highestTrackableValue,
- h.unitMagnitude != other.unitMagnitude,
- h.significantFigures != other.significantFigures,
- h.subBucketHalfCountMagnitude != other.subBucketHalfCountMagnitude,
- h.subBucketHalfCount != other.subBucketHalfCount,
- h.subBucketMask != other.subBucketMask,
- h.subBucketCount != other.subBucketCount,
- h.bucketCount != other.bucketCount,
- h.countsLen != other.countsLen,
- h.totalCount != other.totalCount:
- return false
- default:
- for i, c := range h.counts {
- if c != other.counts[i] {
- return false
- }
- }
- }
- return true
- }
- // Export returns a snapshot view of the Histogram. This can be later passed to
- // Import to construct a new Histogram with the same state.
- func (h *Histogram) Export() *Snapshot {
- return &Snapshot{
- LowestTrackableValue: h.lowestTrackableValue,
- HighestTrackableValue: h.highestTrackableValue,
- SignificantFigures: h.significantFigures,
- Counts: append([]int64(nil), h.counts...), // copy
- }
- }
- // Import returns a new Histogram populated from the Snapshot data (which the
- // caller must stop accessing).
- func Import(s *Snapshot) *Histogram {
- h := New(s.LowestTrackableValue, s.HighestTrackableValue, int(s.SignificantFigures))
- h.counts = s.Counts
- totalCount := int64(0)
- for i := int32(0); i < h.countsLen; i++ {
- countAtIndex := h.counts[i]
- if countAtIndex > 0 {
- totalCount += countAtIndex
- }
- }
- h.totalCount = totalCount
- return h
- }
- func (h *Histogram) iterator() *iterator {
- return &iterator{
- h: h,
- subBucketIdx: -1,
- }
- }
- func (h *Histogram) rIterator() *rIterator {
- return &rIterator{
- iterator: iterator{
- h: h,
- subBucketIdx: -1,
- },
- }
- }
- func (h *Histogram) pIterator(ticksPerHalfDistance int32) *pIterator {
- return &pIterator{
- iterator: iterator{
- h: h,
- subBucketIdx: -1,
- },
- ticksPerHalfDistance: ticksPerHalfDistance,
- }
- }
- func (h *Histogram) sizeOfEquivalentValueRange(v int64) int64 {
- bucketIdx := h.getBucketIndex(v)
- subBucketIdx := h.getSubBucketIdx(v, bucketIdx)
- adjustedBucket := bucketIdx
- if subBucketIdx >= h.subBucketCount {
- adjustedBucket++
- }
- return int64(1) << uint(h.unitMagnitude+int64(adjustedBucket))
- }
- func (h *Histogram) valueFromIndex(bucketIdx, subBucketIdx int32) int64 {
- return int64(subBucketIdx) << uint(int64(bucketIdx)+h.unitMagnitude)
- }
- func (h *Histogram) lowestEquivalentValue(v int64) int64 {
- bucketIdx := h.getBucketIndex(v)
- subBucketIdx := h.getSubBucketIdx(v, bucketIdx)
- return h.valueFromIndex(bucketIdx, subBucketIdx)
- }
- func (h *Histogram) nextNonEquivalentValue(v int64) int64 {
- return h.lowestEquivalentValue(v) + h.sizeOfEquivalentValueRange(v)
- }
- func (h *Histogram) highestEquivalentValue(v int64) int64 {
- return h.nextNonEquivalentValue(v) - 1
- }
- func (h *Histogram) medianEquivalentValue(v int64) int64 {
- return h.lowestEquivalentValue(v) + (h.sizeOfEquivalentValueRange(v) >> 1)
- }
- func (h *Histogram) getCountAtIndex(bucketIdx, subBucketIdx int32) int64 {
- return h.counts[h.countsIndex(bucketIdx, subBucketIdx)]
- }
- func (h *Histogram) countsIndex(bucketIdx, subBucketIdx int32) int32 {
- bucketBaseIdx := (bucketIdx + 1) << uint(h.subBucketHalfCountMagnitude)
- offsetInBucket := subBucketIdx - h.subBucketHalfCount
- return bucketBaseIdx + offsetInBucket
- }
- func (h *Histogram) getBucketIndex(v int64) int32 {
- pow2Ceiling := bitLen(v | h.subBucketMask)
- return int32(pow2Ceiling - int64(h.unitMagnitude) -
- int64(h.subBucketHalfCountMagnitude+1))
- }
- func (h *Histogram) getSubBucketIdx(v int64, idx int32) int32 {
- return int32(v >> uint(int64(idx)+int64(h.unitMagnitude)))
- }
- func (h *Histogram) countsIndexFor(v int64) int {
- bucketIdx := h.getBucketIndex(v)
- subBucketIdx := h.getSubBucketIdx(v, bucketIdx)
- return int(h.countsIndex(bucketIdx, subBucketIdx))
- }
- type iterator struct {
- h *Histogram
- bucketIdx, subBucketIdx int32
- countAtIdx, countToIdx, valueFromIdx int64
- highestEquivalentValue int64
- }
- func (i *iterator) next() bool {
- if i.countToIdx >= i.h.totalCount {
- return false
- }
- // increment bucket
- i.subBucketIdx++
- if i.subBucketIdx >= i.h.subBucketCount {
- i.subBucketIdx = i.h.subBucketHalfCount
- i.bucketIdx++
- }
- if i.bucketIdx >= i.h.bucketCount {
- return false
- }
- i.countAtIdx = i.h.getCountAtIndex(i.bucketIdx, i.subBucketIdx)
- i.countToIdx += i.countAtIdx
- i.valueFromIdx = i.h.valueFromIndex(i.bucketIdx, i.subBucketIdx)
- i.highestEquivalentValue = i.h.highestEquivalentValue(i.valueFromIdx)
- return true
- }
- type rIterator struct {
- iterator
- countAddedThisStep int64
- }
- func (r *rIterator) next() bool {
- for r.iterator.next() {
- if r.countAtIdx != 0 {
- r.countAddedThisStep = r.countAtIdx
- return true
- }
- }
- return false
- }
- type pIterator struct {
- iterator
- seenLastValue bool
- ticksPerHalfDistance int32
- percentileToIteratorTo float64
- percentile float64
- }
- func (p *pIterator) next() bool {
- if !(p.countToIdx < p.h.totalCount) {
- if p.seenLastValue {
- return false
- }
- p.seenLastValue = true
- p.percentile = 100
- return true
- }
- if p.subBucketIdx == -1 && !p.iterator.next() {
- return false
- }
- var done = false
- for !done {
- currentPercentile := (100.0 * float64(p.countToIdx)) / float64(p.h.totalCount)
- if p.countAtIdx != 0 && p.percentileToIteratorTo <= currentPercentile {
- p.percentile = p.percentileToIteratorTo
- halfDistance := math.Trunc(math.Pow(2, math.Trunc(math.Log2(100.0/(100.0-p.percentileToIteratorTo)))+1))
- percentileReportingTicks := float64(p.ticksPerHalfDistance) * halfDistance
- p.percentileToIteratorTo += 100.0 / percentileReportingTicks
- return true
- }
- done = !p.iterator.next()
- }
- return true
- }
- func bitLen(x int64) (n int64) {
- for ; x >= 0x8000; x >>= 16 {
- n += 16
- }
- if x >= 0x80 {
- x >>= 8
- n += 8
- }
- if x >= 0x8 {
- x >>= 4
- n += 4
- }
- if x >= 0x2 {
- x >>= 2
- n += 2
- }
- if x >= 0x1 {
- n++
- }
- return
- }
|