Skip to content

Commit

Permalink
Logging: Add HTTP API to change the log level at runtime (#9357)
Browse files Browse the repository at this point in the history
**What this PR does / why we need it**:
To be able to change the log level at runtime, particularly from info to
debug to enable debugging information during an incident without
restarting Loki.

**Which issue(s) this PR fixes**:
Fixes #6805

**Special notes for your reviewer**:

**Checklist**
- [x] Reviewed the
[`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md)
guide (**required**)
- [x] Documentation added
- [x] Tests updated
- [x] `CHANGELOG.md` updated
- [x] Changes that require user attention or interaction to upgrade are
documented in `docs/sources/upgrading/_index.md`: No such changes

---------

Co-authored-by: J Stickler <julie.stickler@grafana.com>
  • Loading branch information
Indransh and JStickler authored May 25, 2023
1 parent b4d0a2c commit a554047
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
* [8732](https://github.com/grafana/loki/pull/8732) **abaguas**: azure: respect retry config before cancelling the context
* [9206](https://github.com/grafana/loki/pull/9206) **dannykopping**: Ruler: log rule evaluation detail.
* [9184](https://github.com/grafana/loki/pull/9184) **periklis**: Bump dskit to introduce IPv6 support for memberlist
* [9357](https://github.com/grafana/loki/pull/9357) **Indransh**: Add HTTP API to change the log level at runtime
* [9431](https://github.com/grafana/loki/pull/9431) **dannykopping**: Add more buckets to `loki_memcache_request_duration_seconds` metric; latencies can increase if using memcached with NVMe

##### Fixes
Expand Down
16 changes: 16 additions & 0 deletions docs/sources/reference/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ component is different.
These endpoints are exposed by all components:

- [`GET /ready`](#identify-ready-loki-instance)
- [`GET /log_level`](#change-log-level-at-runtime)
- [`GET /metrics`](#return-exposed-prometheus-metrics)
- [`GET /config`](#list-current-configuration)
- [`GET /services`](#list-running-services)
Expand Down Expand Up @@ -612,6 +613,21 @@ running Loki on Kubernetes, `/ready` can be used as a readiness probe.

In microservices mode, the `/ready` endpoint is exposed by all components.

## Change log level at runtime

```
GET /log_level
POST /log_level
```

`/log_level` a `GET` returns the current log level and a `POST` lets you change the log level of a Loki process at runtime. This can be useful for accessing debugging information during an incident. Caution should be used when running at the `debug` log level, as this produces a large volume of data.

Params:

- `log_level`: A valid log level that can be passed as a URL param (`?log_level=<level>`) or as a form value in case of `POST`. Valid levels: [debug, info, warn, error]

In microservices mode, the `/log_level` endpoint is exposed by all components.

## Flush in-memory chunks to backing store

```
Expand Down
2 changes: 2 additions & 0 deletions pkg/loki/loki.go
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,8 @@ func (t *Loki) Run(opts RunOpts) error {
}
t.Server.HTTP.Path("/ready").Methods("GET").Handler(t.readyHandler(sm, shutdownRequested))

t.Server.HTTP.Path("/log_level").Methods("GET", "POST").Handler(util_log.LevelHandler(&t.Cfg.Server.LogLevel))

grpc_health_v1.RegisterHealthServer(t.Server.GRPC, grpcutil.NewHealthCheck(sm))

// Config endpoint adds a way to see the config and the changes compared to the defaults.
Expand Down
67 changes: 63 additions & 4 deletions pkg/util/log/log.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package log

import (
"encoding/json"
"fmt"
"io"
"math"
"net/http"
"os"
"time"

Expand All @@ -22,6 +24,8 @@ var (
Logger = log.NewNopLogger()

bufferedLogger *LineBufferedLogger

plogger *prometheusLogger
)

// InitLogger initialises the global gokit logger (util_log.Logger) and overrides the
Expand Down Expand Up @@ -53,6 +57,7 @@ func Flush() error {

// prometheusLogger exposes Prometheus counters for each of go-kit's log levels.
type prometheusLogger struct {
baseLogger log.Logger
logger log.Logger
logMessages *prometheus.CounterVec
internalLogMessages *prometheus.CounterVec
Expand All @@ -62,6 +67,54 @@ type prometheusLogger struct {
useSyncLogger bool
}

// LevelHandler returns an http handler function that returns the current log level.
// The optional query parameter 'log_level' can be passed to change the log level at runtime.
func LevelHandler(currentLogLevel *logging.Level) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
type logResponse struct {
Status string `json:"status,omitempty"`
Message string `json:"message"`
}
var resp logResponse
status := http.StatusOK

w.Header().Set("Content-Type", "application/json; charset=utf-8")

switch r.Method {
case "GET":
resp = logResponse{
Message: fmt.Sprintf("Current log level is %s", currentLogLevel.String()),
}
case "POST":
logLevel := r.FormValue("log_level")

// Update log level in config
err := currentLogLevel.Set(logLevel)
if err != nil {
status = http.StatusBadRequest
resp = logResponse{
Message: fmt.Sprintf("%v", err),
Status: "failed",
}
} else {
plogger.Set(levelFilter(logLevel))

msg := fmt.Sprintf("Log level set to %s", logLevel)
level.Info(Logger).Log("msg", msg)
resp = logResponse{
Status: "success",
Message: msg,
}
}
}

w.WriteHeader(status)
if err := json.NewEncoder(w).Encode(resp); err != nil {
level.Error(Logger).Log("msg", err)
}
}
}

// newPrometheusLogger creates a new instance of PrometheusLogger which exposes
// Prometheus counters for various log levels.
func newPrometheusLogger(l logging.Level, format logging.Format, reg prometheus.Registerer, buffered bool, sync bool) log.Logger {
Expand Down Expand Up @@ -111,13 +164,14 @@ func newPrometheusLogger(l logging.Level, format logging.Format, reg prometheus.
writer = log.NewSyncWriter(writer)
}

logger := log.NewLogfmtLogger(writer)
baseLogger := log.NewLogfmtLogger(writer)
if format.String() == "json" {
logger = log.NewJSONLogger(writer)
baseLogger = log.NewJSONLogger(writer)
}
logger = level.NewFilter(logger, levelFilter(l.String()))
logger := level.NewFilter(baseLogger, levelFilter(l.String()))

plogger := &prometheusLogger{
plogger = &prometheusLogger{
baseLogger: baseLogger,
logger: logger,
logMessages: logMessages,
internalLogMessages: internalLogMessages,
Expand All @@ -139,6 +193,11 @@ func newPrometheusLogger(l logging.Level, format logging.Format, reg prometheus.
return log.With(plogger, "ts", log.DefaultTimestampUTC)
}

// Set overrides the log level of the logger.
func (pl *prometheusLogger) Set(option level.Option) {
pl.logger = level.NewFilter(pl.baseLogger, option)
}

// Log increments the appropriate Prometheus counter depending on the log level.
func (pl *prometheusLogger) Log(kv ...interface{}) error {
pl.logger.Log(kv...)
Expand Down
64 changes: 64 additions & 0 deletions pkg/util/log/log_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package log

import (
"io"
"net/http"
"net/url"
"strings"
"testing"

"github.com/go-kit/log"
"github.com/stretchr/testify/assert"
"github.com/weaveworks/common/logging"
)

func TestLevelHandler(t *testing.T) {
var lvl logging.Level
err := lvl.Set("info")
assert.NoError(t, err)
plogger = &prometheusLogger{
baseLogger: log.NewLogfmtLogger(io.Discard),
}

// Start test http server
go func() {
err := http.ListenAndServe(":8080", LevelHandler(&lvl))
assert.NoError(t, err)
}()

testCases := []struct {
testName string
targetLogLevel string
expectedResponse string
expectedLogLevel string
expectedStatusCode int
}{
{"GetLogLevel", "", `{"message":"Current log level is info"}`, "info", 200},
{"PostLogLevelInvalid", "invalid", `{"message":"unrecognized log level \"invalid\"", "status":"failed"}`, "info", 400},
{"PostLogLevelEmpty", "", `{"message":"unrecognized log level \"\"", "status":"failed"}`, "info", 400},
{"PostLogLevelDebug", "debug", `{"status": "success", "message":"Log level set to debug"}`, "debug", 200},
}

for _, testCase := range testCases {
t.Run(testCase.testName, func(t *testing.T) {
var (
resp *http.Response
err error
)

if strings.HasPrefix(testCase.testName, "Get") {
resp, err = http.Get("http://localhost:8080/")
} else if strings.HasPrefix(testCase.testName, "Post") {
resp, err = http.PostForm("http://localhost:8080/", url.Values{"log_level": {testCase.targetLogLevel}})
}
assert.NoError(t, err)
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
assert.NoError(t, err)
assert.JSONEq(t, testCase.expectedResponse, string(body))
assert.Equal(t, testCase.expectedStatusCode, resp.StatusCode)
assert.Equal(t, testCase.expectedLogLevel, lvl.String())
})
}
}

0 comments on commit a554047

Please sign in to comment.