-
Notifications
You must be signed in to change notification settings - Fork 27
/
ddsketch.proto
66 lines (55 loc) · 3.03 KB
/
ddsketch.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/* Unless explicitly stated otherwise all files in this repository are licensed under the Apache License 2.0.
* This product includes software developed at Datadog (https://www.datadoghq.com/).
* Copyright 2021 Datadog, Inc.
*/
syntax = "proto3";
option go_package = "github.com/DataDog/sketches-go/ddsketch/pb/sketchpb";
// A DDSketch is essentially a histogram that partitions the range of positive values into an infinite number of
// indexed bins whose size grows exponentially. It keeps track of the number of values (or possibly floating-point
// weights) added to each bin. Negative values are partitioned like positive values, symmetrically to zero.
// The value zero as well as its close neighborhood that would be mapped to extreme bin indexes is mapped to a specific
// counter.
message DDSketch {
// The mapping between positive values and the bin indexes they belong to.
IndexMapping mapping = 1;
// The store for keeping track of positive values.
Store positiveValues = 2;
// The store for keeping track of negative values. A negative value v is mapped using its positive opposite -v.
Store negativeValues = 3;
// The count for the value zero and its close neighborhood (whose width depends on the mapping).
double zeroCount = 4;
}
// How to map positive values to the bins they belong to.
message IndexMapping {
// The gamma parameter of the mapping, such that bin index that a value v belongs to is roughly equal to
// log(v)/log(gamma).
double gamma = 1;
// An offset that can be used to shift all bin indexes.
double indexOffset = 2;
// To speed up the computation of the index a value belongs to, the computation of the log may be approximated using
// the fact that the log to the base 2 of powers of 2 can be computed at a low cost from the binary representation of
// the input value. Other values can be approximated by interpolating between successive powers of 2 (linearly,
// quadratically or cubically).
// NONE means that the log is to be computed exactly (no interpolation).
Interpolation interpolation = 3;
enum Interpolation {
NONE = 0;
LINEAR = 1;
QUADRATIC = 2;
CUBIC = 3;
}
}
// A Store maps bin indexes to their respective counts.
// Counts can be encoded sparsely using binCounts, but also in a contiguous way using contiguousBinCounts and
// contiguousBinIndexOffset. Given that non-empty bins are in practice usually contiguous or close to one another, the
// latter contiguous encoding method is usually more efficient than the sparse one.
// Both encoding methods can be used conjointly. If a bin appears in both the sparse and the contiguous encodings, its
// count value is the sum of the counts in each encodings.
message Store {
// The bin counts, encoded sparsely.
map<sint32, double> binCounts = 1;
// The bin counts, encoded contiguously. The values of contiguousBinCounts are the counts for the bins of indexes
// o, o+1, o+2, etc., where o is contiguousBinIndexOffset.
repeated double contiguousBinCounts = 2 [packed = true];
sint32 contiguousBinIndexOffset = 3;
}