hdr.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. // Package hdrhistogram provides an implementation of Gil Tene's HDR Histogram
  2. // data structure. The HDR Histogram allows for fast and accurate analysis of
  3. // the extreme ranges of data with non-normal distributions, like latency.
  4. package hdrhistogram
  5. import (
  6. "fmt"
  7. "math"
  8. )
  9. // A Bracket is a part of a cumulative distribution.
  10. type Bracket struct {
  11. Quantile float64
  12. Count, ValueAt int64
  13. }
  14. // A Snapshot is an exported view of a Histogram, useful for serializing them.
  15. // A Histogram can be constructed from it by passing it to Import.
  16. type Snapshot struct {
  17. LowestTrackableValue int64
  18. HighestTrackableValue int64
  19. SignificantFigures int64
  20. Counts []int64
  21. }
  22. // A Histogram is a lossy data structure used to record the distribution of
  23. // non-normally distributed data (like latency) with a high degree of accuracy
  24. // and a bounded degree of precision.
  25. type Histogram struct {
  26. lowestTrackableValue int64
  27. highestTrackableValue int64
  28. unitMagnitude int64
  29. significantFigures int64
  30. subBucketHalfCountMagnitude int32
  31. subBucketHalfCount int32
  32. subBucketMask int64
  33. subBucketCount int32
  34. bucketCount int32
  35. countsLen int32
  36. totalCount int64
  37. counts []int64
  38. }
  39. // New returns a new Histogram instance capable of tracking values in the given
  40. // range and with the given amount of precision.
  41. func New(minValue, maxValue int64, sigfigs int) *Histogram {
  42. if sigfigs < 1 || 5 < sigfigs {
  43. panic(fmt.Errorf("sigfigs must be [1,5] (was %d)", sigfigs))
  44. }
  45. largestValueWithSingleUnitResolution := 2 * math.Pow10(sigfigs)
  46. subBucketCountMagnitude := int32(math.Ceil(math.Log2(float64(largestValueWithSingleUnitResolution))))
  47. subBucketHalfCountMagnitude := subBucketCountMagnitude
  48. if subBucketHalfCountMagnitude < 1 {
  49. subBucketHalfCountMagnitude = 1
  50. }
  51. subBucketHalfCountMagnitude--
  52. unitMagnitude := int32(math.Floor(math.Log2(float64(minValue))))
  53. if unitMagnitude < 0 {
  54. unitMagnitude = 0
  55. }
  56. subBucketCount := int32(math.Pow(2, float64(subBucketHalfCountMagnitude)+1))
  57. subBucketHalfCount := subBucketCount / 2
  58. subBucketMask := int64(subBucketCount-1) << uint(unitMagnitude)
  59. // determine exponent range needed to support the trackable value with no
  60. // overflow:
  61. smallestUntrackableValue := int64(subBucketCount) << uint(unitMagnitude)
  62. bucketsNeeded := int32(1)
  63. for smallestUntrackableValue < maxValue {
  64. smallestUntrackableValue <<= 1
  65. bucketsNeeded++
  66. }
  67. bucketCount := bucketsNeeded
  68. countsLen := (bucketCount + 1) * (subBucketCount / 2)
  69. return &Histogram{
  70. lowestTrackableValue: minValue,
  71. highestTrackableValue: maxValue,
  72. unitMagnitude: int64(unitMagnitude),
  73. significantFigures: int64(sigfigs),
  74. subBucketHalfCountMagnitude: subBucketHalfCountMagnitude,
  75. subBucketHalfCount: subBucketHalfCount,
  76. subBucketMask: subBucketMask,
  77. subBucketCount: subBucketCount,
  78. bucketCount: bucketCount,
  79. countsLen: countsLen,
  80. totalCount: 0,
  81. counts: make([]int64, countsLen),
  82. }
  83. }
  84. // ByteSize returns an estimate of the amount of memory allocated to the
  85. // histogram in bytes.
  86. //
  87. // N.B.: This does not take into account the overhead for slices, which are
  88. // small, constant, and specific to the compiler version.
  89. func (h *Histogram) ByteSize() int {
  90. return 6*8 + 5*4 + len(h.counts)*8
  91. }
  92. // Merge merges the data stored in the given histogram with the receiver,
  93. // returning the number of recorded values which had to be dropped.
  94. func (h *Histogram) Merge(from *Histogram) (dropped int64) {
  95. i := from.rIterator()
  96. for i.next() {
  97. v := i.valueFromIdx
  98. c := i.countAtIdx
  99. if h.RecordValues(v, c) != nil {
  100. dropped += c
  101. }
  102. }
  103. return
  104. }
  105. // TotalCount returns total number of values recorded.
  106. func (h *Histogram) TotalCount() int64 {
  107. return h.totalCount
  108. }
  109. // Max returns the approximate maximum recorded value.
  110. func (h *Histogram) Max() int64 {
  111. var max int64
  112. i := h.iterator()
  113. for i.next() {
  114. if i.countAtIdx != 0 {
  115. max = i.highestEquivalentValue
  116. }
  117. }
  118. return h.highestEquivalentValue(max)
  119. }
  120. // Min returns the approximate minimum recorded value.
  121. func (h *Histogram) Min() int64 {
  122. var min int64
  123. i := h.iterator()
  124. for i.next() {
  125. if i.countAtIdx != 0 && min == 0 {
  126. min = i.highestEquivalentValue
  127. break
  128. }
  129. }
  130. return h.lowestEquivalentValue(min)
  131. }
  132. // Mean returns the approximate arithmetic mean of the recorded values.
  133. func (h *Histogram) Mean() float64 {
  134. if h.totalCount == 0 {
  135. return 0
  136. }
  137. var total int64
  138. i := h.iterator()
  139. for i.next() {
  140. if i.countAtIdx != 0 {
  141. total += i.countAtIdx * h.medianEquivalentValue(i.valueFromIdx)
  142. }
  143. }
  144. return float64(total) / float64(h.totalCount)
  145. }
  146. // StdDev returns the approximate standard deviation of the recorded values.
  147. func (h *Histogram) StdDev() float64 {
  148. if h.totalCount == 0 {
  149. return 0
  150. }
  151. mean := h.Mean()
  152. geometricDevTotal := 0.0
  153. i := h.iterator()
  154. for i.next() {
  155. if i.countAtIdx != 0 {
  156. dev := float64(h.medianEquivalentValue(i.valueFromIdx)) - mean
  157. geometricDevTotal += (dev * dev) * float64(i.countAtIdx)
  158. }
  159. }
  160. return math.Sqrt(geometricDevTotal / float64(h.totalCount))
  161. }
  162. // Reset deletes all recorded values and restores the histogram to its original
  163. // state.
  164. func (h *Histogram) Reset() {
  165. h.totalCount = 0
  166. for i := range h.counts {
  167. h.counts[i] = 0
  168. }
  169. }
  170. // RecordValue records the given value, returning an error if the value is out
  171. // of range.
  172. func (h *Histogram) RecordValue(v int64) error {
  173. return h.RecordValues(v, 1)
  174. }
  175. // RecordCorrectedValue records the given value, correcting for stalls in the
  176. // recording process. This only works for processes which are recording values
  177. // at an expected interval (e.g., doing jitter analysis). Processes which are
  178. // recording ad-hoc values (e.g., latency for incoming requests) can't take
  179. // advantage of this.
  180. func (h *Histogram) RecordCorrectedValue(v, expectedInterval int64) error {
  181. if err := h.RecordValue(v); err != nil {
  182. return err
  183. }
  184. if expectedInterval <= 0 || v <= expectedInterval {
  185. return nil
  186. }
  187. missingValue := v - expectedInterval
  188. for missingValue >= expectedInterval {
  189. if err := h.RecordValue(missingValue); err != nil {
  190. return err
  191. }
  192. missingValue -= expectedInterval
  193. }
  194. return nil
  195. }
  196. // RecordValues records n occurrences of the given value, returning an error if
  197. // the value is out of range.
  198. func (h *Histogram) RecordValues(v, n int64) error {
  199. idx := h.countsIndexFor(v)
  200. if idx < 0 || int(h.countsLen) <= idx {
  201. return fmt.Errorf("value %d is too large to be recorded", v)
  202. }
  203. h.counts[idx] += n
  204. h.totalCount += n
  205. return nil
  206. }
  207. // ValueAtQuantile returns the recorded value at the given quantile (0..100).
  208. func (h *Histogram) ValueAtQuantile(q float64) int64 {
  209. if q > 100 {
  210. q = 100
  211. }
  212. total := int64(0)
  213. countAtPercentile := int64(((q / 100) * float64(h.totalCount)) + 0.5)
  214. i := h.iterator()
  215. for i.next() {
  216. total += i.countAtIdx
  217. if total >= countAtPercentile {
  218. return h.highestEquivalentValue(i.valueFromIdx)
  219. }
  220. }
  221. return 0
  222. }
  223. // CumulativeDistribution returns an ordered list of brackets of the
  224. // distribution of recorded values.
  225. func (h *Histogram) CumulativeDistribution() []Bracket {
  226. var result []Bracket
  227. i := h.pIterator(1)
  228. for i.next() {
  229. result = append(result, Bracket{
  230. Quantile: i.percentile,
  231. Count: i.countToIdx,
  232. ValueAt: i.highestEquivalentValue,
  233. })
  234. }
  235. return result
  236. }
  237. // SignificantFigures returns the significant figures used to create the
  238. // histogram
  239. func (h *Histogram) SignificantFigures() int64 {
  240. return h.significantFigures
  241. }
  242. // LowestTrackableValue returns the lower bound on values that will be added
  243. // to the histogram
  244. func (h *Histogram) LowestTrackableValue() int64 {
  245. return h.lowestTrackableValue
  246. }
  247. // HighestTrackableValue returns the upper bound on values that will be added
  248. // to the histogram
  249. func (h *Histogram) HighestTrackableValue() int64 {
  250. return h.highestTrackableValue
  251. }
  252. // Histogram bar for plotting
  253. type Bar struct {
  254. From, To, Count int64
  255. }
  256. // Pretty print as csv for easy plotting
  257. func (b Bar) String() string {
  258. return fmt.Sprintf("%v, %v, %v\n", b.From, b.To, b.Count)
  259. }
  260. // Distribution returns an ordered list of bars of the
  261. // distribution of recorded values, counts can be normalized to a probability
  262. func (h *Histogram) Distribution() (result []Bar) {
  263. i := h.iterator()
  264. for i.next() {
  265. result = append(result, Bar{
  266. Count: i.countAtIdx,
  267. From: h.lowestEquivalentValue(i.valueFromIdx),
  268. To: i.highestEquivalentValue,
  269. })
  270. }
  271. return result
  272. }
  273. // Equals returns true if the two Histograms are equivalent, false if not.
  274. func (h *Histogram) Equals(other *Histogram) bool {
  275. switch {
  276. case
  277. h.lowestTrackableValue != other.lowestTrackableValue,
  278. h.highestTrackableValue != other.highestTrackableValue,
  279. h.unitMagnitude != other.unitMagnitude,
  280. h.significantFigures != other.significantFigures,
  281. h.subBucketHalfCountMagnitude != other.subBucketHalfCountMagnitude,
  282. h.subBucketHalfCount != other.subBucketHalfCount,
  283. h.subBucketMask != other.subBucketMask,
  284. h.subBucketCount != other.subBucketCount,
  285. h.bucketCount != other.bucketCount,
  286. h.countsLen != other.countsLen,
  287. h.totalCount != other.totalCount:
  288. return false
  289. default:
  290. for i, c := range h.counts {
  291. if c != other.counts[i] {
  292. return false
  293. }
  294. }
  295. }
  296. return true
  297. }
  298. // Export returns a snapshot view of the Histogram. This can be later passed to
  299. // Import to construct a new Histogram with the same state.
  300. func (h *Histogram) Export() *Snapshot {
  301. return &Snapshot{
  302. LowestTrackableValue: h.lowestTrackableValue,
  303. HighestTrackableValue: h.highestTrackableValue,
  304. SignificantFigures: h.significantFigures,
  305. Counts: append([]int64(nil), h.counts...), // copy
  306. }
  307. }
  308. // Import returns a new Histogram populated from the Snapshot data (which the
  309. // caller must stop accessing).
  310. func Import(s *Snapshot) *Histogram {
  311. h := New(s.LowestTrackableValue, s.HighestTrackableValue, int(s.SignificantFigures))
  312. h.counts = s.Counts
  313. totalCount := int64(0)
  314. for i := int32(0); i < h.countsLen; i++ {
  315. countAtIndex := h.counts[i]
  316. if countAtIndex > 0 {
  317. totalCount += countAtIndex
  318. }
  319. }
  320. h.totalCount = totalCount
  321. return h
  322. }
  323. func (h *Histogram) iterator() *iterator {
  324. return &iterator{
  325. h: h,
  326. subBucketIdx: -1,
  327. }
  328. }
  329. func (h *Histogram) rIterator() *rIterator {
  330. return &rIterator{
  331. iterator: iterator{
  332. h: h,
  333. subBucketIdx: -1,
  334. },
  335. }
  336. }
  337. func (h *Histogram) pIterator(ticksPerHalfDistance int32) *pIterator {
  338. return &pIterator{
  339. iterator: iterator{
  340. h: h,
  341. subBucketIdx: -1,
  342. },
  343. ticksPerHalfDistance: ticksPerHalfDistance,
  344. }
  345. }
  346. func (h *Histogram) sizeOfEquivalentValueRange(v int64) int64 {
  347. bucketIdx := h.getBucketIndex(v)
  348. subBucketIdx := h.getSubBucketIdx(v, bucketIdx)
  349. adjustedBucket := bucketIdx
  350. if subBucketIdx >= h.subBucketCount {
  351. adjustedBucket++
  352. }
  353. return int64(1) << uint(h.unitMagnitude+int64(adjustedBucket))
  354. }
  355. func (h *Histogram) valueFromIndex(bucketIdx, subBucketIdx int32) int64 {
  356. return int64(subBucketIdx) << uint(int64(bucketIdx)+h.unitMagnitude)
  357. }
  358. func (h *Histogram) lowestEquivalentValue(v int64) int64 {
  359. bucketIdx := h.getBucketIndex(v)
  360. subBucketIdx := h.getSubBucketIdx(v, bucketIdx)
  361. return h.valueFromIndex(bucketIdx, subBucketIdx)
  362. }
  363. func (h *Histogram) nextNonEquivalentValue(v int64) int64 {
  364. return h.lowestEquivalentValue(v) + h.sizeOfEquivalentValueRange(v)
  365. }
  366. func (h *Histogram) highestEquivalentValue(v int64) int64 {
  367. return h.nextNonEquivalentValue(v) - 1
  368. }
  369. func (h *Histogram) medianEquivalentValue(v int64) int64 {
  370. return h.lowestEquivalentValue(v) + (h.sizeOfEquivalentValueRange(v) >> 1)
  371. }
  372. func (h *Histogram) getCountAtIndex(bucketIdx, subBucketIdx int32) int64 {
  373. return h.counts[h.countsIndex(bucketIdx, subBucketIdx)]
  374. }
  375. func (h *Histogram) countsIndex(bucketIdx, subBucketIdx int32) int32 {
  376. bucketBaseIdx := (bucketIdx + 1) << uint(h.subBucketHalfCountMagnitude)
  377. offsetInBucket := subBucketIdx - h.subBucketHalfCount
  378. return bucketBaseIdx + offsetInBucket
  379. }
  380. func (h *Histogram) getBucketIndex(v int64) int32 {
  381. pow2Ceiling := bitLen(v | h.subBucketMask)
  382. return int32(pow2Ceiling - int64(h.unitMagnitude) -
  383. int64(h.subBucketHalfCountMagnitude+1))
  384. }
  385. func (h *Histogram) getSubBucketIdx(v int64, idx int32) int32 {
  386. return int32(v >> uint(int64(idx)+int64(h.unitMagnitude)))
  387. }
  388. func (h *Histogram) countsIndexFor(v int64) int {
  389. bucketIdx := h.getBucketIndex(v)
  390. subBucketIdx := h.getSubBucketIdx(v, bucketIdx)
  391. return int(h.countsIndex(bucketIdx, subBucketIdx))
  392. }
  393. type iterator struct {
  394. h *Histogram
  395. bucketIdx, subBucketIdx int32
  396. countAtIdx, countToIdx, valueFromIdx int64
  397. highestEquivalentValue int64
  398. }
  399. func (i *iterator) next() bool {
  400. if i.countToIdx >= i.h.totalCount {
  401. return false
  402. }
  403. // increment bucket
  404. i.subBucketIdx++
  405. if i.subBucketIdx >= i.h.subBucketCount {
  406. i.subBucketIdx = i.h.subBucketHalfCount
  407. i.bucketIdx++
  408. }
  409. if i.bucketIdx >= i.h.bucketCount {
  410. return false
  411. }
  412. i.countAtIdx = i.h.getCountAtIndex(i.bucketIdx, i.subBucketIdx)
  413. i.countToIdx += i.countAtIdx
  414. i.valueFromIdx = i.h.valueFromIndex(i.bucketIdx, i.subBucketIdx)
  415. i.highestEquivalentValue = i.h.highestEquivalentValue(i.valueFromIdx)
  416. return true
  417. }
  418. type rIterator struct {
  419. iterator
  420. countAddedThisStep int64
  421. }
  422. func (r *rIterator) next() bool {
  423. for r.iterator.next() {
  424. if r.countAtIdx != 0 {
  425. r.countAddedThisStep = r.countAtIdx
  426. return true
  427. }
  428. }
  429. return false
  430. }
  431. type pIterator struct {
  432. iterator
  433. seenLastValue bool
  434. ticksPerHalfDistance int32
  435. percentileToIteratorTo float64
  436. percentile float64
  437. }
  438. func (p *pIterator) next() bool {
  439. if !(p.countToIdx < p.h.totalCount) {
  440. if p.seenLastValue {
  441. return false
  442. }
  443. p.seenLastValue = true
  444. p.percentile = 100
  445. return true
  446. }
  447. if p.subBucketIdx == -1 && !p.iterator.next() {
  448. return false
  449. }
  450. var done = false
  451. for !done {
  452. currentPercentile := (100.0 * float64(p.countToIdx)) / float64(p.h.totalCount)
  453. if p.countAtIdx != 0 && p.percentileToIteratorTo <= currentPercentile {
  454. p.percentile = p.percentileToIteratorTo
  455. halfDistance := math.Trunc(math.Pow(2, math.Trunc(math.Log2(100.0/(100.0-p.percentileToIteratorTo)))+1))
  456. percentileReportingTicks := float64(p.ticksPerHalfDistance) * halfDistance
  457. p.percentileToIteratorTo += 100.0 / percentileReportingTicks
  458. return true
  459. }
  460. done = !p.iterator.next()
  461. }
  462. return true
  463. }
  464. func bitLen(x int64) (n int64) {
  465. for ; x >= 0x8000; x >>= 16 {
  466. n += 16
  467. }
  468. if x >= 0x80 {
  469. x >>= 8
  470. n += 8
  471. }
  472. if x >= 0x8 {
  473. x >>= 4
  474. n += 4
  475. }
  476. if x >= 0x2 {
  477. x >>= 2
  478. n += 2
  479. }
  480. if x >= 0x1 {
  481. n++
  482. }
  483. return
  484. }