Skip to content

Commit 799f9c5

Browse files
authored
Fix reset handling by checking against previous value instead of reset value. (#263)
1 parent 1361301 commit 799f9c5

File tree

2 files changed

+178
-5
lines changed

2 files changed

+178
-5
lines changed

retrieval/series_cache.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,21 @@ type seriesCacheEntry struct {
100100
suffix string
101101
hash uint64
102102

103-
hasReset bool
103+
// Whether the series has been reset/initialized yet. This is false only for
104+
// the first sample of a new series in the cache, which causes the initial
105+
// "reset". After that, it is always true.
106+
hasReset bool
107+
108+
// The value and timestamp of the latest reset. The timestamp is when it
109+
// occurred, and the value is what it was reset to. resetValue will initially
110+
// be the value of the first sample, and then 0 for every subsequent reset.
104111
resetValue float64
105112
resetTimestamp int64
113+
114+
// Value of the most recent point seen for the time series. If a new value is
115+
// less than the previous, then the series has reset.
116+
previousValue float64
117+
106118
// maxSegment indicates the maximum WAL segment index in which
107119
// the series was first logged.
108120
// By providing it as an upper bound, we can safely delete a series entry
@@ -295,19 +307,22 @@ func (c *seriesCache) getResetAdjusted(ref uint64, t int64, v float64) (int64, f
295307
if !hasReset {
296308
e.resetTimestamp = t
297309
e.resetValue = v
310+
e.previousValue = v
298311
// If we just initialized the reset timestamp, this sample should be skipped.
299312
// We don't know the window over which the current cumulative value was built up over.
300313
// The next sample for will be considered from this point onwards.
301314
return 0, 0, false
302315
}
303-
if v < e.resetValue {
316+
if v < e.previousValue {
317+
// If the value has dropped, there's been a reset.
304318
// If the series was reset, set the reset timestamp to be one millisecond
305319
// before the timestamp of the current sample.
306320
// We don't know the true reset time but this ensures the range is non-zero
307321
// while unlikely to conflict with any previous sample.
308322
e.resetValue = 0
309323
e.resetTimestamp = t - 1
310324
}
325+
e.previousValue = v
311326
return e.resetTimestamp, v - e.resetValue, true
312327
}
313328

retrieval/transform_test.go

Lines changed: 161 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ func TestSampleBuilder(t *testing.T) {
128128
{Ref: 2, T: 2000, V: 5.5},
129129
{Ref: 2, T: 3000, V: 8},
130130
{Ref: 2, T: 4000, V: 9},
131-
{Ref: 2, T: 5000, V: 3},
131+
{Ref: 2, T: 5000, V: 7},
132132
{Ref: 1, T: 1000, V: 200},
133133
{Ref: 3, T: 3000, V: 1},
134134
{Ref: 4, T: 4000, V: 2},
@@ -184,7 +184,7 @@ func TestSampleBuilder(t *testing.T) {
184184
},
185185
}},
186186
},
187-
{ // 3
187+
{ // 3: Reset series since sample's value is less than previous value.
188188
Resource: &monitoredres_pb.MonitoredResource{
189189
Type: "resource2",
190190
Labels: map[string]string{"resource_a": "resource2_a"},
@@ -201,7 +201,7 @@ func TestSampleBuilder(t *testing.T) {
201201
EndTime: &timestamp_pb.Timestamp{Seconds: 5},
202202
},
203203
Value: &monitoring_pb.TypedValue{
204-
Value: &monitoring_pb.TypedValue_DoubleValue{3},
204+
Value: &monitoring_pb.TypedValue_DoubleValue{7},
205205
},
206206
}},
207207
},
@@ -973,6 +973,164 @@ func TestSampleBuilder(t *testing.T) {
973973
},
974974
},
975975
},
976+
// Samples resulting in multiple resets for a single time series.
977+
{
978+
targets: targetMap{
979+
"job1/instance1": &targets.Target{
980+
Labels: promlabels.FromStrings("job", "job1", "instance", "instance1"),
981+
DiscoveredLabels: promlabels.FromStrings("__resource_a", "resource2_a"),
982+
},
983+
},
984+
series: seriesMap{
985+
1: labels.FromStrings("job", "job1", "instance", "instance1", "__name__", "metric1_count"),
986+
},
987+
metadata: metadataMap{
988+
"job1/instance1/metric1": &metadata.Entry{Metric: "metric1_count", MetricType: textparse.MetricTypeSummary, ValueType: metric_pb.MetricDescriptor_DOUBLE},
989+
},
990+
metricPrefix: "test.googleapis.com",
991+
input: []tsdb.RefSample{
992+
// The first result will always be nil due to reset timestamp handling.
993+
{Ref: 1, T: 2000, V: 5}, // reset since first value; use as baseline
994+
{Ref: 1, T: 3000, V: 8},
995+
{Ref: 1, T: 4000, V: 9},
996+
{Ref: 1, T: 5000, V: 8}, // reset since value dropped (8<9)
997+
{Ref: 1, T: 6000, V: 4}, // reset since value dropped (4<8)
998+
{Ref: 1, T: 7000, V: 12},
999+
{Ref: 1, T: 8000, V: 1}, // reset since value dropped (1<12)
1000+
},
1001+
result: []*monitoring_pb.TimeSeries{
1002+
nil, // first sample of new series is always nil; used as reset baseline
1003+
{
1004+
Resource: &monitoredres_pb.MonitoredResource{
1005+
Type: "resource2",
1006+
Labels: map[string]string{"resource_a": "resource2_a"},
1007+
},
1008+
Metric: &metric_pb.Metric{
1009+
Type: "test.googleapis.com/metric1_count",
1010+
Labels: map[string]string{},
1011+
},
1012+
MetricKind: metric_pb.MetricDescriptor_CUMULATIVE,
1013+
ValueType: metric_pb.MetricDescriptor_INT64,
1014+
Points: []*monitoring_pb.Point{{
1015+
Interval: &monitoring_pb.TimeInterval{
1016+
StartTime: &timestamp_pb.Timestamp{Seconds: 2},
1017+
EndTime: &timestamp_pb.Timestamp{Seconds: 3},
1018+
},
1019+
Value: &monitoring_pb.TypedValue{
1020+
Value: &monitoring_pb.TypedValue_Int64Value{3},
1021+
},
1022+
}},
1023+
},
1024+
{
1025+
Resource: &monitoredres_pb.MonitoredResource{
1026+
Type: "resource2",
1027+
Labels: map[string]string{"resource_a": "resource2_a"},
1028+
},
1029+
Metric: &metric_pb.Metric{
1030+
Type: "test.googleapis.com/metric1_count",
1031+
Labels: map[string]string{},
1032+
},
1033+
MetricKind: metric_pb.MetricDescriptor_CUMULATIVE,
1034+
ValueType: metric_pb.MetricDescriptor_INT64,
1035+
Points: []*monitoring_pb.Point{{
1036+
Interval: &monitoring_pb.TimeInterval{
1037+
StartTime: &timestamp_pb.Timestamp{Seconds: 2},
1038+
EndTime: &timestamp_pb.Timestamp{Seconds: 4},
1039+
},
1040+
Value: &monitoring_pb.TypedValue{
1041+
Value: &monitoring_pb.TypedValue_Int64Value{4},
1042+
},
1043+
}},
1044+
},
1045+
// reset since value dropped (8<9)
1046+
{
1047+
Resource: &monitoredres_pb.MonitoredResource{
1048+
Type: "resource2",
1049+
Labels: map[string]string{"resource_a": "resource2_a"},
1050+
},
1051+
Metric: &metric_pb.Metric{
1052+
Type: "test.googleapis.com/metric1_count",
1053+
Labels: map[string]string{},
1054+
},
1055+
MetricKind: metric_pb.MetricDescriptor_CUMULATIVE,
1056+
ValueType: metric_pb.MetricDescriptor_INT64,
1057+
Points: []*monitoring_pb.Point{{
1058+
Interval: &monitoring_pb.TimeInterval{
1059+
StartTime: &timestamp_pb.Timestamp{Seconds: 4, Nanos: 1e9 - 1e6},
1060+
EndTime: &timestamp_pb.Timestamp{Seconds: 5},
1061+
},
1062+
Value: &monitoring_pb.TypedValue{
1063+
Value: &monitoring_pb.TypedValue_Int64Value{8},
1064+
},
1065+
}},
1066+
},
1067+
// reset since value dropped (4<8)
1068+
{
1069+
Resource: &monitoredres_pb.MonitoredResource{
1070+
Type: "resource2",
1071+
Labels: map[string]string{"resource_a": "resource2_a"},
1072+
},
1073+
Metric: &metric_pb.Metric{
1074+
Type: "test.googleapis.com/metric1_count",
1075+
Labels: map[string]string{},
1076+
},
1077+
MetricKind: metric_pb.MetricDescriptor_CUMULATIVE,
1078+
ValueType: metric_pb.MetricDescriptor_INT64,
1079+
Points: []*monitoring_pb.Point{{
1080+
Interval: &monitoring_pb.TimeInterval{
1081+
StartTime: &timestamp_pb.Timestamp{Seconds: 5, Nanos: 1e9 - 1e6},
1082+
EndTime: &timestamp_pb.Timestamp{Seconds: 6},
1083+
},
1084+
Value: &monitoring_pb.TypedValue{
1085+
Value: &monitoring_pb.TypedValue_Int64Value{4},
1086+
},
1087+
}},
1088+
},
1089+
{
1090+
Resource: &monitoredres_pb.MonitoredResource{
1091+
Type: "resource2",
1092+
Labels: map[string]string{"resource_a": "resource2_a"},
1093+
},
1094+
Metric: &metric_pb.Metric{
1095+
Type: "test.googleapis.com/metric1_count",
1096+
Labels: map[string]string{},
1097+
},
1098+
MetricKind: metric_pb.MetricDescriptor_CUMULATIVE,
1099+
ValueType: metric_pb.MetricDescriptor_INT64,
1100+
Points: []*monitoring_pb.Point{{
1101+
Interval: &monitoring_pb.TimeInterval{
1102+
StartTime: &timestamp_pb.Timestamp{Seconds: 5, Nanos: 1e9 - 1e6},
1103+
EndTime: &timestamp_pb.Timestamp{Seconds: 7},
1104+
},
1105+
Value: &monitoring_pb.TypedValue{
1106+
Value: &monitoring_pb.TypedValue_Int64Value{12},
1107+
},
1108+
}},
1109+
},
1110+
// reset since value dropped (1<12)
1111+
{
1112+
Resource: &monitoredres_pb.MonitoredResource{
1113+
Type: "resource2",
1114+
Labels: map[string]string{"resource_a": "resource2_a"},
1115+
},
1116+
Metric: &metric_pb.Metric{
1117+
Type: "test.googleapis.com/metric1_count",
1118+
Labels: map[string]string{},
1119+
},
1120+
MetricKind: metric_pb.MetricDescriptor_CUMULATIVE,
1121+
ValueType: metric_pb.MetricDescriptor_INT64,
1122+
Points: []*monitoring_pb.Point{{
1123+
Interval: &monitoring_pb.TimeInterval{
1124+
StartTime: &timestamp_pb.Timestamp{Seconds: 7, Nanos: 1e9 - 1e6},
1125+
EndTime: &timestamp_pb.Timestamp{Seconds: 8},
1126+
},
1127+
Value: &monitoring_pb.TypedValue{
1128+
Value: &monitoring_pb.TypedValue_Int64Value{1},
1129+
},
1130+
}},
1131+
},
1132+
},
1133+
},
9761134
}
9771135
ctx, cancel := context.WithCancel(context.Background())
9781136
defer cancel()

0 commit comments

Comments
 (0)