-
Notifications
You must be signed in to change notification settings - Fork 89
/
Copy pathconfig.alloy
459 lines (406 loc) · 19.8 KB
/
config.alloy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
///////////////////////////////////////////////////////////////////////////////
// Configuration file
local.file "endpoints" {
// The endpoints file is used to define the endpoints, credentials and options
// for the Alloy export to.
filename = "/etc/alloy/endpoints.json"
}
///////////////////////////////////////////////////////////////////////////////
// Metrics scraping
// Scrape Tempo, Mimir, Phlare and Loki
// We use the prometheus.scrape component and give this a unique label.
prometheus.scrape "mltpg_infra" {
// The targets array allows us to specify which service targets to scrape from.
// Define the address to scrape from, and add a 'group' and 'service' label for each target.
targets = [
{"__address__" = "mimir:9009", group = "infrastructure", service = "mimir"},
{"__address__" = "tempo:3200", group = "infrastructure", service = "tempo"},
{"__address__" = "loki:3100", group = "infrastructure", service = "loki"},
{"__address__" = "pyroscope:4040", group = "infrastructure", service = "pyroscope"},
{"__address__" = "grafana:3000", group = "infrastructure", service = "grafana"},
]
// Scrape all of these services every 15 seconds.
scrape_interval = "15s"
// Send the metrics to the prometheus remote write receiver for exporting to Mimir.
forward_to = [prometheus.remote_write.mimir.receiver]
// The job name to add to the scraped metrics.
job_name = "mltpg_infra"
}
// This component scrapes the Mythical application, defining unique prometheus labels.
prometheus.scrape "mythical" {
// Scrape from the mythical requester and server services, and add them to the 'mythical' group with their service
// names.
targets = [
{"__address__" = "mythical-server:4000", group = "mythical", service = "mythical-server"},
{"__address__" = "mythical-requester:4001", group = "mythical", service = "mythical-requester"},
]
// We need a scrape interval and timeout of 2s as we want reactive metric data.
scrape_interval = "2s"
scrape_timeout = "2s"
// Send the metrics to the prometheus remote write receiver for exporting to Mimir.
forward_to = [prometheus.remote_write.mimir.receiver]
// Attach the job name to the metrics.
job_name = "mythical"
}
// Scrape the Beyla services, to expose the metrics generated by them for the Mythical services.
prometheus.scrape "beyla_infra" {
// The targets array allows us to specify which service targets to scrape from.
// Define the address to scrape from, and add a 'group' and 'service' label for each target.
targets = [
{"__address__" = "beyla-requester:9090", group = "beyla", service = "beyla-requester"},
{"__address__" = "beyla-server:9090", group = "beyla", service = "beyla-server"},
{"__address__" = "beyla-recorder:9090", group = "beyla", service = "beyla-recorder"},
]
// Scrape all of these services every 15 seconds.
scrape_interval = "15s"
// Send the metrics to the prometheus remote write receiver for exporting to Mimir.
forward_to = [prometheus.remote_write.mimir.receiver]
// The job name to add to the scraped metrics.
job_name = "beyla_infra"
}
// Scrape the local Alloy itself.
prometheus.scrape "alloy" {
// Only one target, the Alloy, it's part of the 'infrastructure' group.
targets = [{"__address__" = "localhost:12345", group = "infrastructure", service = "alloy"}]
// Send the metrics to the prometheus remote write receiver for exporting to Mimir.
forward_to = [prometheus.remote_write.mimir.receiver]
// Attach job name to the metrics.
job_name = "alloy"
}
// The Alloy exports everything, using an empty block.
prometheus.exporter.unix "default" {
}
// This component scrapes the Unix exporter metrics generated above.
prometheus.scrape "unix" {
// Use the Unix prometheus exporter as the target.
targets = prometheus.exporter.unix.default.targets
// Send the metrics to the prometheus remote write receiver for exporting to Mimir.
forward_to = [prometheus.remote_write.mimir.receiver]
// Attach job name to the metrics.
job_name = "node_exporter"
}
// The prometheus.remote_write component defines an endpoint for remotely writing metrics to.
// In this case, our locally running Mimir service.
prometheus.remote_write "mimir" {
// The endpoint is the Mimir service.
endpoint {
url = json_path(local.file.endpoints.content, ".metrics.url")[0]
// Basic auth credentials. If the endpoint is not TLS, whilst sent, these will be ignored.
basic_auth {
username = json_path(local.file.endpoints.content, ".metrics.basicAuth.username")[0]
password = json_path(local.file.endpoints.content, ".metrics.basicAuth.password")[0]
}
}
}
///////////////////////////////////////////////////////////////////////////////
// Logging
// The Loki receiver is used to ingest logs from the mythical application via Loki's HTTP REST API.
loki.source.api "mythical" {
// Listen for Loki data on port 3100.
http {
listen_address = "0.0.0.0"
listen_port = "3100"
}
// Forward all received data to the Loki processor component.
forward_to = [loki.process.mythical.receiver]
}
// The Loki processor allows us to accept a correctly formatted Loki log and to run a series of pipeline stages on it.
// This particular example shows how to parse timestamp data within a logline and use it as the timestamp for the logline.
// It essentially does nothing if the `TIMESHIFT` variable for the `mythical-requester` service is not set to `true` in
// the relevant Docker Compose manifest.
loki.process "mythical" {
// There are other stages that could easily extract the value from the logline (such as the logfmt stage), but this
// showcases a more complex manual regexp to extract the value into the map.
stage.regex {
expression=`^.*?loggedtime=(?P<loggedtime>\S+)`
}
// Use the timestamp stage to take the extracted value, now in the map, and use it as the timestamp for the logline.
// By doing so, you can ensure that logs that have reached Alloy at a later time than originally emitted are
// corrected to use the correct time, instead of the time they were received by Alloy.
// This stage shows an example of a user-defined timestamp format (note that the specific time the format is
// declared in is important for Alloy to understand the format correctly). We could also have used the RFC3339
// identifier in this case.
stage.timestamp {
source = "loggedtime"
format = "2006-01-02T15:04:05.000Z07:00"
}
// Forward to the Loki writer for output.
forward_to = [loki.write.mythical.receiver]
}
loki.write "mythical" {
// Output the Loki log to the local Loki instance.
endpoint {
url = json_path(local.file.endpoints.content, ".logs.url")[0]
// The basic auth credentials for the Loki instance.
basic_auth {
username = json_path(local.file.endpoints.content, ".logs.basicAuth.username")[0]
password = json_path(local.file.endpoints.content, ".logs.basicAuth.password")[0]
}
}
}
///////////////////////////////////////////////////////////////////////////////
// Tracing
// The OpenTelemetry receiver is used to ingest all incoming trace spans. A label 'otlp_receiver' is added to uniquely
// identify this instance.
// Note that both the instrumented application *and* Beyla use the same receiver to send traces.
otelcol.receiver.otlp "otlp_receiver" {
// We don't technically need this, but it shows how to change listen address and incoming port.
// In this case, the Alloy is listening on all available bindable addresses on port 4317 (which is the
// default OTLP gRPC port) for the OTLP protocol.
grpc {
endpoint = "0.0.0.0:4317"
}
// We define where to send the output of all ingested traces. In this case, to the OpenTelemetry batch processor
// named 'default'.
output {
traces = [
// Uncomment the next line to generate service graph metrics from the Alloy. By default this is generated
// by the Tempo component, so be sure to remove the relevant configuration in the `tempo/tempo.yaml` file.
//otelcol.connector.servicegraph.tracemetrics.input,
// Uncomment the next line to generate span metrics from the Alloy. By default this is generated
// by the Tempo component, so be sure to remove the relevant configuration in the `tempo/tempo.yaml` file.
//otelcol.connector.spanmetrics.tracemetrics.input,
// The following would be used for tail sampling only traces containing errors.
// Uncomment the following line, then comment out the line below it (the batch processor) to use
// tail sampling.
//otelcol.processor.tail_sampling.errors.input,
otelcol.processor.batch.default.input,
otelcol.connector.spanlogs.autologging.input,
]
}
}
// The OpenTelemetry batch processor collects trace spans until a batch size or timeout is met, before sending those
// spans onto another target. This processor is labeled 'default'.
otelcol.processor.batch "default" {
// Wait until we've received 1000 samples, up to a maximum of 2000.
send_batch_size = 1000
send_batch_max_size = 2000
// Or until 2 seconds have elapsed.
timeout = "2s"
// When the Alloy has enough batched data, send it to the OpenTelemetry exporter named 'tempo'.
output {
traces = [otelcol.exporter.otlp.tempo.input]
}
}
// The OpenTelemetry exporter exports processed trace spans to another target that is listening for OTLP format traces.
// A unique label, 'tempo', is added to uniquely identify this exporter.
otelcol.exporter.otlp "tempo" {
// Define the client for exporting.
client {
// Authentication block.
auth = otelcol.auth.headers.tempo.handler
// Send to the locally running Tempo instance, on port 4317 (OTLP gRPC).
endpoint = json_path(local.file.endpoints.content, ".traces.url")[0]
// Configure TLS settings for communicating with the endpoint.
tls {
// The connection is insecure.
insecure = json_path(local.file.endpoints.content, ".traces.tls.insecure")[0]
// Do not verify TLS certificates when connecting.
insecure_skip_verify = json_path(local.file.endpoints.content, ".traces.tls.insecureSkipVerify")[0]
}
}
}
// The OpenTelemetry auth headers component is used to define the headers for the OTLP exporter. Note we don't
// use basic auth here because the OTel spec. demands TLS enabled for basic auth. Using basic header auth
// allow us to still wire up the basic auth credentials to the Tempo exporter even when they won't be required.
otelcol.auth.headers "tempo" {
header {
key = "Authorization"
value = join(["Basic ", json_path(local.file.endpoints.content, ".traces.basicAuthToken")[0]], "")
}
}
// The OpenTelemetry spanlog connector processes incoming trace spans and extracts data from them ready
// for logging. This is the equivalent of Grafana Alloy's static automatic_logging pipeline.
otelcol.connector.spanlogs "autologging" {
// We only want to output a line for each root span (ie. every single trace), and not for every
// process or span (outputting a line for every span would be extremely verbose).
spans = false
roots = true
processes = false
// We want to ensure that the following three span attributes are included in the log line, if
// present.
span_attributes = [ "http.method", "http.target", "http.status_code" ]
// Overrides the default key in the log line to be `traceId`, which is then used by Grafana to
// identify the trace ID for correlation with the Tempo datasource.
overrides {
trace_id_key = "traceId"
}
// Send to the OpenTelemetry Loki exporter.
output {
logs = [otelcol.exporter.loki.autologging.input]
}
}
// Simply forwards the incoming OpenTelemetry log format out as a Loki log.
// We need this stage to ensure we can then process the logline as a Loki object.
otelcol.exporter.loki "autologging" {
forward_to = [loki.process.autologging.receiver]
}
// The Loki processor allows us to accept a correctly formatted Loki log and mutate it into
// a set of fields for output.
loki.process "autologging" {
// The JSON stage simply extracts the `body` (the actual logline) from the Loki log, ignoring
// all other fields.
stage.json {
expressions = { "body" = "" }
}
// The output stage takes the body (the main logline) and uses this as the source for the output
// logline. In this case, it essentially turns it into logfmt.
stage.output {
source = "body"
}
// Finally send the processed logline onto the Loki exporter.
forward_to = [loki.write.autologging.receiver]
}
// The Loki writer receives a processed Loki log and then writes it to a Loki instance.
loki.write "autologging" {
// Add the `alloy` value to the `job` label, so we can identify it as having been generated
// by Grafana Alloy when querying.
external_labels = {
job = "alloy",
}
// Output the Loki log to the local Loki instance.
endpoint {
url = json_path(local.file.endpoints.content, ".logs.url")[0]
// The basic auth credentials for the Loki instance.
basic_auth {
username = json_path(local.file.endpoints.content, ".logs.basicAuth.username")[0]
password = json_path(local.file.endpoints.content, ".logs.basicAuth.password")[0]
}
}
}
// The Tail Sampling processor will use a set of policies to determine which received traces to keep
// and send to Tempo.
otelcol.processor.tail_sampling "errors" {
// Total wait time from the start of a trace before making a sampling decision. Note that smaller time
// periods can potentially cause a decision to be made before the end of a trace has occurred.
decision_wait = "30s"
// The following policies follow a logical OR pattern, meaning that if any of the policies match,
// the trace will be kept. For logical AND, you can use the `and` policy. Every span of a trace is
// examined by each policy in turn. A match will cause a short-circuit.
// This policy defines that traces that contain errors should be kept.
policy {
// The name of the policy can be used for logging purposes.
name = "sample-erroring-traces"
// The type must match the type of policy to be used, in this case examining the status code
// of every span in the trace.
type = "status_code"
// This block determines the error codes that should match in order to keep the trace,
// in this case the OpenTelemetry 'ERROR' code.
status_code {
status_codes = [ "ERROR" ]
}
}
// This policy defines that only traces that are longer than 200ms in total should be kept.
policy {
// The name of the policy can be used for logging purposes.
name = "sample-long-traces"
// The type must match the policy to be used, in this case the total latency of the trace.
type = "latency"
// This block determines the total length of the trace in milliseconds.
latency {
threshold_ms = 200
}
}
// The output block forwards the kept traces onto the batch processor, which will marshall them
// for exporting to Tempo.
output {
traces = [otelcol.processor.batch.default.input]
}
}
// The Spanmetrics Connector will generate RED metrics based on the incoming trace span data.
otelcol.connector.spanmetrics "tracemetrics" {
// The namespace explicit adds a prefix to all the generated span metrics names.
// In this case, we'll ensure they match as closely as possible those generated by Tempo.
namespace = "traces.spanmetrics"
// Each extra dimension (metrics label) to be added to the generated metrics from matching span attributes. These
// need to be defined with a name and optionally a default value (in the following cases, we do not want a default
// value if the span attribute is not present).
dimension {
name = "http.method"
}
dimension {
name = "http.target"
}
dimension {
name = "http.status_code"
}
dimension {
name = "service.version"
}
// A histogram block must be present, either explicitly defining bucket values or via an exponential block.
// We do the latter here.
histogram {
explicit {
}
}
// The exemplar block is added to ensure we generate exemplars for traces on relevant metric values.
exemplars {
enabled = true
}
// Generated metrics data is in OTLP format. We send this data to the OpenTelemetry Prometheus exporter to ensure
// it gets transformed into Prometheus format data.
output {
metrics = [otelcol.exporter.prometheus.tracemetrics.input]
}
}
// The Servicegraph Connector will generate service graph metrics (edges and nodes) based on incoming trace spans.
otelcol.connector.servicegraph "tracemetrics" {
// Extra dimensions (metrics labels) to be added to the generated metrics from matching span attributes.
// For this component, this is defined as an array. There are no default values and the labels will not be generated
// for missing span attributes.
dimensions = [
"http.method",
"http.target",
"http.status_code",
"service.version",
]
// Generated metrics data is in OTLP format. We send this data to the OpenTelemetry Prometheus exporter to ensure
// it gets transformed into Prometheus format data.
output {
metrics = [otelcol.exporter.prometheus.tracemetrics.input]
}
}
// The OpenTelemetry Prometheus exporter will transform incoming OTLP metrics data into Prometheus format data.
otelcol.exporter.prometheus "tracemetrics" {
// Forward to our local Prometheus remote writer which will send the metrics to Mimir.
forward_to = [prometheus.remote_write.mimir.receiver]
}
///////////////////////////////////////////////////////////////////////////////
// Profiling
// Scrape the Mythical application services for profiling data.
pyroscope.scrape "mythical" {
// Denotes the targets to be scraped, in this case the mythical server, requester and recorder.
targets = [
{"__address__" = "mythical-server:4000", group = "mythical", service_name = "mythical-server"},
{"__address__" = "mythical-requester:4001", group = "mythical", service_name = "mythical-requester"},
{"__address__" = "mythical-recorder:4002", group = "mythical", service_name = "mythical-recorder"},
]
// The profiling configuration block determines the profiling information to be retrieved. For the
// NodeJS application, we're looking for both CPU and memory data.
profiling_config {
profile.process_cpu {
enabled = true
path = "/debug/pprof/profile"
delta = true
}
profile.memory {
enabled = true
path = "/debug/pprof/heap"
delta = false
}
}
// Forward all scraped data to the Pyroscope exporter.
forward_to = [pyroscope.write.mythical.receiver]
}
// The Pyroscope exporter writes data with any additional information to the local Pyroscope instance.
pyroscope.write "mythical" {
// The endpoint is the listening Pyroscope instance.
endpoint {
url = json_path(local.file.endpoints.content, ".profiles.url")[0]
// The basic auth credentials for the Pyroscope instance.
basic_auth {
username = json_path(local.file.endpoints.content, ".profiles.basicAuth.username")[0]
password = json_path(local.file.endpoints.content, ".profiles.basicAuth.password")[0]
}
}
}