From 1dca242ba4a709f17b2567c0d0733e039bafef52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Mon, 27 Apr 2020 23:57:12 +0100
Subject: [PATCH 01/13] wip metrics.

---
 go.mod                           |   3 +
 go.sum                           |  48 ++++
 runtime/env.go                   |  19 ++
 runtime/{output.go => events.go} |  93 ++-----
 runtime/files.go                 | 103 ++++----
 runtime/influxdb.go              |  38 +++
 runtime/logger.go                |   6 -
 runtime/metrics.go               | 266 ++++++++++----------
 runtime/metrics_types.go         | 134 ++++++++++
 runtime/runenv.go                | 415 ++++++++-----------------------
 runtime/runner.go                |   7 +-
 runtime/runparams.go             | 289 +++++++++++++++++++++
 runtime/sinks.go                 |  37 +++
 13 files changed, 873 insertions(+), 585 deletions(-)
 create mode 100644 runtime/env.go
 rename runtime/{output.go => events.go} (54%)
 create mode 100644 runtime/influxdb.go
 create mode 100644 runtime/metrics_types.go
 create mode 100644 runtime/runparams.go
 create mode 100644 runtime/sinks.go

diff --git a/go.mod b/go.mod
index e33b271..dc987f5 100644
--- a/go.mod
+++ b/go.mod
@@ -5,7 +5,10 @@ go 1.14
 require (
 	github.com/dustin/go-humanize v1.0.0
 	github.com/go-redis/redis/v7 v7.2.0
+	github.com/hashicorp/go-multierror v1.1.0
+	github.com/influxdata/influxdb-client-go v1.1.0
 	github.com/prometheus/client_golang v1.5.1
+	github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0
 	go.uber.org/zap v1.14.1
 	golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
 )
diff --git a/go.sum b/go.sum
index 76619d7..3c1b3e6 100644
--- a/go.sum
+++ b/go.sum
@@ -10,13 +10,20 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/deepmap/oapi-codegen v1.3.6 h1:Wj44p9A0V0PJ+AUg0BWdyGcsS1LY18U+0rCuPQgK0+o=
+github.com/deepmap/oapi-codegen v1.3.6/go.mod h1:aBozjEveG+33xPiP55Iw/XbVkhtZHEGLq3nxlX0+hfU=
+github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
 github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
 github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
+github.com/getkin/kin-openapi v0.2.0/go.mod h1:V1z9xl9oF5Wt7v32ne4FmiF1alpS4dM6mNzoywPOXlk=
+github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
+github.com/go-chi/chi v4.0.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ=
 github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
 github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
 github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
@@ -29,13 +36,22 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
 github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golangci/lint-1 v0.0.0-20181222135242-d2cdd8c08219/go.mod h1:/X8TswGSh1pIozq4ZwCfxS0WA5JGXguxk94ar/4c87Y=
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
+github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
+github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/go-multierror v1.1.0 h1:B9UzwGQJehnUY1yNrnwREHc3fGbC2xefo8g4TbElacI=
+github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
 github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
+github.com/influxdata/influxdb-client-go v1.1.0 h1:ht1HvNAfBuwY9/H0i1tOiVH4vHpkZ3gFwYD5j/xvyFA=
+github.com/influxdata/influxdb-client-go v1.1.0/go.mod h1:ZVjaPW87aKp5hzyny2WVpWVF0UY+iqtPz9veOZ2T1zw=
+github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 h1:W9WBk7wlPfJLvMCdtV4zPulc4uCPrlywQOmbFOhgQNU=
+github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
 github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
 github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
@@ -47,6 +63,18 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/labstack/echo/v4 v4.1.11 h1:z0BZoArY4FqdpUEl+wlHp4hnr/oSR6MTmQmv8OHSoww=
+github.com/labstack/echo/v4 v4.1.11/go.mod h1:i541M3Fj6f76NZtHSj7TXnyM8n2gaodfvfxNnFqi74g=
+github.com/labstack/gommon v0.3.0 h1:JEeO0bvc78PKdyHxloTKiF8BD5iGrH8T6MSeGvSgob0=
+github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k=
+github.com/matryer/moq v0.0.0-20190312154309-6cfb0558e1bd/go.mod h1:9ELz6aaclSIGnZBoaSLZ3NAl1VTufbOrXBPvtcy6WiQ=
+github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
+github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA=
+github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
+github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
+github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ=
+github.com/mattn/go-isatty v0.0.10 h1:qxFzApOv4WsAL965uUPIsXzAKCZxN2p9UqdhFS4ZW10=
+github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
 github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -62,6 +90,8 @@ github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa
 github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
@@ -79,6 +109,8 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R
 github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
 github.com/prometheus/procfs v0.0.8 h1:+fpWZdT24pJBiqJdAwYBjPSk+5YmQzYNPYzQsdzLkt8=
 github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
+github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 h1:MkV+77GLUNo5oJ0jf870itWm3D0Sjh7+Za9gazKc5LQ=
+github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
 github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
@@ -88,6 +120,11 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
+github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
+github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
+github.com/valyala/fasttemplate v1.1.0 h1:RZqt0yGBsps8NGvLSGW804QQqCUYYLsaOjTVHy1Ocw4=
+github.com/valyala/fasttemplate v1.1.0/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
 go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk=
 go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
 go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A=
@@ -99,6 +136,9 @@ go.uber.org/zap v1.14.1/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc=
 golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708 h1:pXVtWnwHkrWD9ru3sDxY/qFK/bfc0egRovX91EjWjf4=
+golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs=
 golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
@@ -110,6 +150,8 @@ golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLL
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20190923162816-aa69164e4478 h1:l5EDrHhldLYb3ZRHDUhXF7Om7MvYXnkV9/iQNo1lX6g=
 golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20191112182307-2180aed22343 h1:00ohfJ4K98s3m6BGUoBd8nyfp4Yl0GoIKvw5abItTjI=
+golang.org/x/net v0.0.0-20191112182307-2180aed22343/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -121,9 +163,13 @@ golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5h
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200122134326-e047566fdf82 h1:ywK/j/KkyTHcdyYSZNXGjMwgmDSfjglYZ3vStQ/gSCU=
 golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -135,6 +181,8 @@ golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgw
 golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5 h1:hKsoRgsbwY1NafxrwTs+k64bikrLBkAgPir1TNCj3Zs=
 golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f h1:kDxGY2VmgABOe55qheT/TFqUMtcTHnomIPS1iv3G4Ms=
+golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/runtime/env.go b/runtime/env.go
new file mode 100644
index 0000000..e15f780
--- /dev/null
+++ b/runtime/env.go
@@ -0,0 +1,19 @@
+package runtime
+
+const (
+	EnvTestBranch             = "TEST_BRANCH"
+	EnvTestCase               = "TEST_CASE"
+	EnvTestGroupID            = "TEST_GROUP_ID"
+	EnvTestGroupInstanceCount = "TEST_GROUP_INSTANCE_COUNT"
+	EnvTestInstanceCount      = "TEST_INSTANCE_COUNT"
+	EnvTestInstanceParams     = "TEST_INSTANCE_PARAMS"
+	EnvTestInstanceRole       = "TEST_INSTANCE_ROLE"
+	EnvTestOutputsPath        = "TEST_OUTPUTS_PATH"
+	EnvTestPlan               = "TEST_PLAN"
+	EnvTestRepo               = "TEST_REPO"
+	EnvTestRun                = "TEST_RUN"
+	EnvTestSidecar            = "TEST_SIDECAR"
+	EnvTestStartTime          = "TEST_START_TIME"
+	EnvTestSubnet             = "TEST_SUBNET"
+	EnvTestTag                = "TEST_TAG"
+)
diff --git a/runtime/output.go b/runtime/events.go
similarity index 54%
rename from runtime/output.go
rename to runtime/events.go
index 9b3cd47..f06bb5c 100644
--- a/runtime/output.go
+++ b/runtime/events.go
@@ -16,7 +16,6 @@ type (
 const (
 	EventTypeStart   = EventType("start")
 	EventTypeMessage = EventType("message")
-	EventTypeMetric  = EventType("metric")
 	EventTypeFinish  = EventType("finish")
 
 	EventOutcomeOK      = EventOutcome("ok")
@@ -30,21 +29,9 @@ type Event struct {
 	Error      string       `json:"error,omitempty"`
 	Stacktrace string       `json:"stacktrace,omitempty"`
 	Message    string       `json:"message,omitempty"`
-	Metric     *MetricValue `json:"metric,omitempty"`
 	Runenv     *RunParams   `json:"runenv,omitempty"`
 }
 
-type MetricDefinition struct {
-	Name           string `json:"name"`
-	Unit           string `json:"unit"`
-	ImprovementDir int    `json:"dir"`
-}
-
-type MetricValue struct {
-	MetricDefinition
-	Value float64 `json:"value"`
-}
-
 func (e Event) MarshalLogObject(oe zapcore.ObjectEncoder) error {
 	oe.AddString("type", string(e.Type))
 
@@ -60,11 +47,6 @@ func (e Event) MarshalLogObject(oe zapcore.ObjectEncoder) error {
 	if e.Message != "" {
 		oe.AddString("message", e.Message)
 	}
-	if e.Metric != nil {
-		if err := oe.AddObject("metric", e.Metric); err != nil {
-			return err
-		}
-	}
 	if e.Runenv != nil {
 		if err := oe.AddObject("runenv", e.Runenv); err != nil {
 			return err
@@ -74,46 +56,35 @@ func (e Event) MarshalLogObject(oe zapcore.ObjectEncoder) error {
 	return nil
 }
 
-func (m MetricValue) MarshalLogObject(oe zapcore.ObjectEncoder) error {
-	if m.Name == "" {
-		return nil
-	}
-	oe.AddString("name", m.Name)
-	oe.AddString("unit", m.Unit)
-	oe.AddInt("dir", m.ImprovementDir)
-	oe.AddFloat64("value", m.Value)
-	return nil
-}
-
-func (r *RunParams) MarshalLogObject(oe zapcore.ObjectEncoder) error {
-	oe.AddString("plan", r.TestPlan)
-	oe.AddString("case", r.TestCase)
-	if err := oe.AddReflected("params", r.TestInstanceParams); err != nil {
+func (rp *RunParams) MarshalLogObject(oe zapcore.ObjectEncoder) error {
+	oe.AddString("plan", rp.TestPlan)
+	oe.AddString("case", rp.TestCase)
+	if err := oe.AddReflected("params", rp.TestInstanceParams); err != nil {
 		return err
 	}
-	oe.AddInt("instances", r.TestInstanceCount)
-	oe.AddString("outputs_path", r.TestOutputsPath)
+	oe.AddInt("instances", rp.TestInstanceCount)
+	oe.AddString("outputs_path", rp.TestOutputsPath)
 	oe.AddString("network", func() string {
-		if r.TestSubnet == nil {
+		if rp.TestSubnet == nil {
 			return ""
 		}
-		return r.TestSubnet.Network()
+		return rp.TestSubnet.Network()
 	}())
 
-	oe.AddString("group", r.TestGroupID)
-	oe.AddInt("group_instances", r.TestGroupInstanceCount)
+	oe.AddString("group", rp.TestGroupID)
+	oe.AddInt("group_instances", rp.TestGroupInstanceCount)
 
-	if r.TestRepo != "" {
-		oe.AddString("repo", r.TestRepo)
+	if rp.TestRepo != "" {
+		oe.AddString("repo", rp.TestRepo)
 	}
-	if r.TestCommit != "" {
-		oe.AddString("commit", r.TestCommit)
+	if rp.TestCommit != "" {
+		oe.AddString("commit", rp.TestCommit)
 	}
-	if r.TestBranch != "" {
-		oe.AddString("branch", r.TestBranch)
+	if rp.TestBranch != "" {
+		oe.AddString("branch", rp.TestBranch)
 	}
-	if r.TestTag != "" {
-		oe.AddString("tag", r.TestTag)
+	if rp.TestTag != "" {
+		oe.AddString("tag", rp.TestTag)
 	}
 	return nil
 }
@@ -170,31 +141,3 @@ func (l *logger) RecordCrash(err interface{}) {
 	}
 	l.logger.Error("", zap.Object("event", evt))
 }
-
-// RecordMetric records a metric event associated with the provided metric
-// definition, giving it value `value`.
-func (l *logger) RecordMetric(metric *MetricDefinition, value float64) {
-	evt := Event{
-		Type: EventTypeMetric,
-		Metric: &MetricValue{
-			MetricDefinition: *metric,
-			Value:            value,
-		},
-	}
-	l.logger.Info("", zap.Object("event", evt))
-}
-
-// Message prints out an informational message.
-//
-// Deprecated: use RecordMessage.
-func (r *RunEnv) Message(msg string, a ...interface{}) {
-	r.RecordMessage(msg, a...)
-}
-
-// EmitMetric outputs a metric event associated with the provided metric
-// definition, giving it value `value`.
-//
-// Deprecated: use RecordMetric.
-func (r *RunEnv) EmitMetric(metric *MetricDefinition, value float64) {
-	r.RecordMetric(metric, value)
-}
diff --git a/runtime/files.go b/runtime/files.go
index 1af2f86..377302f 100644
--- a/runtime/files.go
+++ b/runtime/files.go
@@ -3,7 +3,6 @@ package runtime
 import (
 	"bufio"
 	"crypto/rand"
-	"fmt"
 	"io"
 	"io/ioutil"
 	"os"
@@ -13,6 +12,53 @@ import (
 	"go.uber.org/zap/zapcore"
 )
 
+// CreateRawAsset creates an output asset.
+//
+// Output assets will be saved when the test terminates and available for
+// further investigation. You can also manually create output assets/directories
+// under re.TestOutputsPath.
+func (re *RunEnv) CreateRawAsset(name string) (*os.File, error) {
+	file, err := os.Create(filepath.Join(re.TestOutputsPath, name))
+	if err != nil {
+		return nil, err
+	}
+
+	re.unstructured.ch <- file
+
+	return file, nil
+}
+
+// CreateStructuredAsset creates an output asset and wraps it in zap loggers.
+func (re *RunEnv) CreateStructuredAsset(name string, config zap.Config) (*zap.Logger, *zap.SugaredLogger, error) {
+	path := filepath.Join(re.TestOutputsPath, name)
+	config.OutputPaths = []string{path}
+
+	logger, err := config.Build()
+	if err != nil {
+		return nil, nil, err
+	}
+
+	re.structured.ch <- logger
+
+	return logger, logger.Sugar(), nil
+}
+
+// StandardJSONConfig returns a zap.Config with JSON encoding, debug verbosity,
+// caller and stacktraces disabled, and with timestamps encoded as nanos after
+// epoch.
+func StandardJSONConfig() zap.Config {
+	enc := zap.NewProductionEncoderConfig()
+	enc.EncodeTime = zapcore.EpochNanosTimeEncoder
+
+	return zap.Config{
+		Level:             zap.NewAtomicLevelAt(zap.DebugLevel),
+		Encoding:          "json",
+		EncoderConfig:     enc,
+		DisableCaller:     true,
+		DisableStacktrace: true,
+	}
+}
+
 // CreateRandomFile creates a file of the specified size (in bytes) within the
 // specified directory path and returns its path.
 func (re *RunEnv) CreateRandomFile(directoryPath string, size int64) (string, error) {
@@ -63,58 +109,3 @@ func (re *RunEnv) CreateRandomDirectory(directoryPath string, depth uint) (strin
 
 	return base, nil
 }
-
-// CreateRawAsset creates an output asset.
-//
-// Output assets will be saved when the test terminates and available for
-// further investigation. You can also manually create output assets/directories
-// under re.TestOutputsPath.
-func (re *RunEnv) CreateRawAsset(name string) (*os.File, error) {
-	file, err := os.Create(filepath.Join(re.TestOutputsPath, name))
-	if err != nil {
-		return nil, err
-	}
-
-	select {
-	case re.unstructured <- file:
-	default:
-		return nil, fmt.Errorf("too many unstructured assets; current: %d", len(re.unstructured))
-	}
-
-	return file, nil
-}
-
-// CreateStructuredAsset creates an output asset and wraps it in zap loggers.
-func (re *RunEnv) CreateStructuredAsset(name string, config zap.Config) (*zap.Logger, *zap.SugaredLogger, error) {
-	path := filepath.Join(re.TestOutputsPath, name)
-	config.OutputPaths = []string{path}
-
-	logger, err := config.Build()
-	if err != nil {
-		return nil, nil, err
-	}
-
-	select {
-	case re.structured <- logger:
-	default:
-		return nil, nil, fmt.Errorf("too many structured assets; current: %d", len(re.structured))
-	}
-
-	return logger, logger.Sugar(), nil
-}
-
-// StandardJSONConfig returns a zap.Config with JSON encoding, debug verbosity,
-// caller and stacktraces disabled, and with timestamps encoded as nanos after
-// epoch.
-func StandardJSONConfig() zap.Config {
-	enc := zap.NewProductionEncoderConfig()
-	enc.EncodeTime = zapcore.EpochNanosTimeEncoder
-
-	return zap.Config{
-		Level:             zap.NewAtomicLevelAt(zap.DebugLevel),
-		Encoding:          "json",
-		EncoderConfig:     enc,
-		DisableCaller:     true,
-		DisableStacktrace: true,
-	}
-}
diff --git a/runtime/influxdb.go b/runtime/influxdb.go
new file mode 100644
index 0000000..0771d4b
--- /dev/null
+++ b/runtime/influxdb.go
@@ -0,0 +1,38 @@
+package runtime
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"time"
+
+	influxdb2 "github.com/influxdata/influxdb-client-go"
+)
+
+const (
+	EnvInfluxDBURL       = "INFLUXDB_URL"
+	EnvInfluxDBAuthToken = "INFLUXDB_AUTH"
+)
+
+func NewInfluxDBClient() (influxdb2.InfluxDBClient, error) {
+	url := os.Getenv(EnvInfluxDBURL)
+	if url == "" {
+		return nil, fmt.Errorf("no InfluxDB URL in $%s env var", EnvInfluxDBURL)
+	}
+
+	auth := os.Getenv(EnvInfluxDBAuthToken)
+
+	opts := influxdb2.DefaultOptions()
+	opts.SetMaxRetries(10)
+	opts.SetHttpRequestTimeout(30)
+	opts.SetUseGZip(true)
+
+	client := influxdb2.NewClientWithOptions(url, auth, opts)
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	if ok, err := client.Ready(ctx); err != nil || !ok {
+		return nil, fmt.Errorf("influxdb not ready: %w", err)
+	}
+	return client, nil
+}
diff --git a/runtime/logger.go b/runtime/logger.go
index bbe20e2..8c7668e 100644
--- a/runtime/logger.go
+++ b/runtime/logger.go
@@ -11,7 +11,6 @@ import (
 type logger struct {
 	runenv *RunParams
 
-	// TODO: we'll want different kinds of loggers.
 	logger  *zap.Logger
 	slogger *zap.SugaredLogger
 }
@@ -73,8 +72,3 @@ func (l *logger) init() {
 func (l *logger) SLogger() *zap.SugaredLogger {
 	return l.slogger
 }
-
-// Loggers returns the loggers populated from this runenv.
-func (l *logger) Loggers() (*zap.Logger, *zap.SugaredLogger) {
-	return l.logger, l.slogger
-}
diff --git a/runtime/metrics.go b/runtime/metrics.go
index a3a46a6..fa4d0fd 100644
--- a/runtime/metrics.go
+++ b/runtime/metrics.go
@@ -1,171 +1,169 @@
 package runtime
 
 import (
-	"context"
-	"io"
-	"net/http"
-	"os"
-	"path"
-	"strconv"
+	"sync"
 	"time"
 
-	"github.com/prometheus/client_golang/prometheus"
+	"github.com/rcrowley/go-metrics"
 )
 
 // Type aliases to hide implementation details in the APIs.
 type (
-	Counter   = prometheus.Counter
-	Gauge     = prometheus.Gauge
-	Histogram = prometheus.Histogram
-	Summary   = prometheus.Summary
-
-	CounterOpts   = prometheus.CounterOpts
-	GaugeOpts     = prometheus.GaugeOpts
-	HistogramOpts = prometheus.HistogramOpts
-	SummaryOpts   = prometheus.SummaryOpts
-
-	CounterVec   = prometheus.CounterVec
-	GaugeVec     = prometheus.GaugeVec
-	HistogramVec = prometheus.HistogramVec
-	SummaryVec   = prometheus.SummaryVec
+	Counter   = metrics.Counter
+	EWMA      = metrics.EWMA
+	Gauge     = metrics.GaugeFloat64
+	Histogram = metrics.Histogram
+	Meter     = metrics.Meter
+	Sample    = metrics.Sample
+	Timer     = metrics.Timer
+	Point     float64
 )
 
-type Metrics struct {
-	runenv *RunEnv
+type SinkFn func(m *Metric) error
+
+type MetricsApi struct {
+	// re is the RunEnv this MetricsApi object is attached to.
+	re *RunEnv
+
+	// reg is the go-metrics Registry this MetricsApi object creates metrics under.
+	reg metrics.Registry
+
+	// sinks to invoke when a new observation has been made.
+	//  1) data points are sent immediately.
+	//  2) aggregated metrics are sent periodically, based on freq.
+	sinks []SinkFn
+
+	// freq is the frequency with which to materialize aggregated metrics.
+	freq time.Duration
+
+	wg           sync.WaitGroup
+	freqChangeCh chan time.Duration
+	doneCh       chan struct{}
 }
 
-func (*Metrics) NewCounter(o CounterOpts) Counter {
-	m := prometheus.NewCounter(o)
-	switch err := prometheus.Register(m); err.(type) {
-	case nil, prometheus.AlreadyRegisteredError:
-	default:
-		panic(err)
-	}
-	return m
+type metricsApiOpts struct {
+	prefix string
+	freq   time.Duration
+	sinks  []SinkFn
 }
 
-func (*Metrics) NewGauge(o GaugeOpts) Gauge {
-	m := prometheus.NewGauge(o)
-	switch err := prometheus.Register(m); err.(type) {
-	case nil, prometheus.AlreadyRegisteredError:
-	default:
-		panic(err)
+func newMetricsApi(re *RunEnv, opts metricsApiOpts) *MetricsApi {
+	m := &MetricsApi{
+		re:           re,
+		reg:          metrics.NewPrefixedRegistry(opts.prefix),
+		sinks:        opts.sinks,
+		freq:         opts.freq,
+		freqChangeCh: make(chan time.Duration),
+		doneCh:       make(chan struct{}),
 	}
+
+	m.wg.Add(1)
+	go m.background()
 	return m
 }
 
-func (*Metrics) NewHistogram(o HistogramOpts) Histogram {
-	m := prometheus.NewHistogram(o)
-	switch err := prometheus.Register(m); err.(type) {
-	case nil, prometheus.AlreadyRegisteredError:
-	default:
-		panic(err)
+func (m *MetricsApi) background() {
+	var (
+		tick *time.Ticker
+		c    <-chan time.Time
+	)
+
+	defer m.wg.Done()
+
+	// resetTicker resets the ticker to a new frequency.
+	resetTicker := func(d time.Duration) {
+		if tick != nil {
+			tick.Stop()
+			tick = nil
+			c = nil
+		}
+		if d <= 0 {
+			return
+		}
+		tick = time.NewTicker(d)
+		c = tick.C
 	}
-	return m
-}
 
-func (*Metrics) NewSummary(o SummaryOpts) Summary {
-	m := prometheus.NewSummary(o)
-	switch err := prometheus.Register(m); err.(type) {
-	case nil, prometheus.AlreadyRegisteredError:
-	default:
-		panic(err)
+	// Will stop and nullify the ticker.
+	defer resetTicker(0)
+
+	// Set the initial tick frequency.
+	resetTicker(m.freq)
+
+	for {
+		select {
+		case <-c:
+			m.reg.Each(m.broadcast)
+
+		case f := <-m.freqChangeCh:
+			m.freq = f
+			resetTicker(f)
+
+		case <-m.doneCh:
+			return
+		}
 	}
-	return m
 }
 
-func (*Metrics) NewCounterVec(o CounterOpts, labels ...string) *CounterVec {
-	m := prometheus.NewCounterVec(o, labels)
-	switch err := prometheus.Register(m); err.(type) {
-	case nil, prometheus.AlreadyRegisteredError:
-	default:
-		panic(err)
+// broadcast sends an observation to all emitters.
+func (m *MetricsApi) broadcast(name string, obj interface{}) {
+	metric := NewMetric(name, obj)
+	defer metric.Release()
+
+	for _, sink := range m.sinks {
+		if err := sink(metric); err != nil {
+			m.re.RecordMessage("failed to emit aggregated metric: %s", err)
+		}
 	}
-	return m
 }
 
-func (*Metrics) NewGaugeVec(o GaugeOpts, labels ...string) *GaugeVec {
-	m := prometheus.NewGaugeVec(o, labels)
-	switch err := prometheus.Register(m); err.(type) {
-	case nil, prometheus.AlreadyRegisteredError:
-	default:
-		panic(err)
-	}
-	return m
+func (m *MetricsApi) Close() error {
+	close(m.doneCh)
+	m.wg.Wait()
+
+	return nil
 }
 
-func (*Metrics) NewHistogramVec(o HistogramOpts, labels ...string) *HistogramVec {
-	m := prometheus.NewHistogramVec(o, labels)
-	switch err := prometheus.Register(m); err.(type) {
-	case nil, prometheus.AlreadyRegisteredError:
-	default:
-		panic(err)
-	}
-	return m
+func (m *MetricsApi) SetFrequency(freq time.Duration) {
+	m.freqChangeCh <- freq
 }
 
-func (*Metrics) NewSummaryVec(o SummaryOpts, labels ...string) *SummaryVec {
-	m := prometheus.NewSummaryVec(o, labels)
-	switch err := prometheus.Register(m); err.(type) {
-	case nil, prometheus.AlreadyRegisteredError:
-	default:
-		panic(err)
-	}
-	return m
+func (m *MetricsApi) RecordPoint(name string, value float64) {
+	m.broadcast(name, Point(value))
 }
 
-// HTTPPeriodicSnapshots periodically fetches the snapshots from the given address
-// and outputs them to the out directory. Every file will be in the format timestamp.out.
-func (re *RunEnv) HTTPPeriodicSnapshots(ctx context.Context, addr string, dur time.Duration, outDir string) error {
-	err := os.MkdirAll(path.Join(re.TestOutputsPath, outDir), 0777)
-	if err != nil {
-		return err
-	}
+func (m *MetricsApi) NewCounter(name string) Counter {
+	return m.reg.GetOrRegister(name, metrics.NewCounter()).(metrics.Counter)
+}
 
-	nextFile := func() (*os.File, error) {
-		timestamp := strconv.FormatInt(time.Now().Unix(), 10)
-		return os.Create(path.Join(re.TestOutputsPath, outDir, timestamp+".out"))
-	}
+func (m *MetricsApi) NewEWMA(name string, alpha float64) EWMA {
+	return m.reg.GetOrRegister(name, metrics.NewEWMA(alpha)).(metrics.EWMA)
+}
 
-	go func() {
-		ticker := time.NewTicker(dur)
-		defer ticker.Stop()
-
-		for {
-			select {
-			case <-ctx.Done():
-				return
-			case <-ticker.C:
-				func() {
-					req, err := http.NewRequestWithContext(ctx, "GET", addr, nil)
-					if err != nil {
-						re.RecordMessage("error while creating http request: %v", err)
-						return
-					}
-
-					resp, err := http.DefaultClient.Do(req)
-					if err != nil {
-						re.RecordMessage("error while scraping http endpoint: %v", err)
-						return
-					}
-					defer resp.Body.Close()
-
-					file, err := nextFile()
-					if err != nil {
-						re.RecordMessage("error while getting metrics output file: %v", err)
-						return
-					}
-					defer file.Close()
-
-					_, err = io.Copy(file, resp.Body)
-					if err != nil {
-						re.RecordMessage("error while copying data to file: %v", err)
-						return
-					}
-				}()
-			}
-		}
-	}()
+func (m *MetricsApi) NewGauge(name string) Gauge {
+	return m.reg.GetOrRegister(name, metrics.NewGaugeFloat64()).(metrics.GaugeFloat64)
+}
 
-	return nil
+func (m *MetricsApi) NewFunctionalGauge(name string, f func() float64) Gauge {
+	return m.reg.GetOrRegister(name, metrics.NewFunctionalGaugeFloat64(f)).(metrics.GaugeFloat64)
+}
+
+func (m *MetricsApi) NewHistogram(name string, s Sample) Histogram {
+	return m.reg.GetOrRegister(name, metrics.NewHistogram(s)).(metrics.Histogram)
+}
+
+func (m *MetricsApi) NewMeter(name string) Meter {
+	return m.reg.GetOrRegister(name, metrics.NewMeter()).(metrics.Meter)
+}
+
+func (m *MetricsApi) NewTimer(name string) Timer {
+	return m.reg.GetOrRegister(name, metrics.NewTimer()).(metrics.Timer)
+}
+
+func (m *MetricsApi) NewExpDecaySample(reservoirSize int, alpha float64) Sample {
+	return metrics.NewExpDecaySample(reservoirSize, alpha)
+}
+
+func (m *MetricsApi) NewUniformSample(reservoirSize int) Sample {
+	return metrics.NewUniformSample(reservoirSize)
 }
diff --git a/runtime/metrics_types.go b/runtime/metrics_types.go
new file mode 100644
index 0000000..2659cd6
--- /dev/null
+++ b/runtime/metrics_types.go
@@ -0,0 +1,134 @@
+package runtime
+
+import (
+	"sync"
+	"time"
+)
+
+type MetricType int
+
+const (
+	MetricPoint MetricType = iota
+	MetricCounter
+	MetricEWMA
+	MetricGauge
+	MetricHistogram
+	MetricMeter
+	MetricTimer
+)
+
+func (mt MetricType) String() string {
+	return [...]string{"point", "counter", "ewma", "gauge", "histogram", "meter", "timer"}[mt]
+}
+
+var pools = func() (p [7]sync.Pool) {
+	for i := range p {
+		p[i].New = func() interface{} {
+			return &Metric{Type: MetricType(i), Measures: make(map[string]interface{}, 1)}
+		}
+	}
+	return p
+}()
+
+type Metric struct {
+	Timestamp int64                  `json:"ts"`
+	Type      MetricType             `json:"t"`
+	Name      string                 `json:"n"`
+	Measures  map[string]interface{} `json:"m"`
+}
+
+func (m *Metric) Release() {
+	pools[m.Type].Put(m)
+}
+
+func NewMetric(name string, i interface{}) *Metric {
+	var (
+		m  *Metric
+		t  MetricType
+		ts = time.Now().UnixNano()
+	)
+
+	switch v := i.(type) {
+	case Point:
+		t = MetricPoint
+		m = pools[t].Get().(*Metric)
+		m.Measures["value"] = v
+
+	case Counter:
+		t = MetricCounter
+		m = pools[t].Get().(*Metric)
+		s := v.Snapshot()
+		m.Measures["count"] = s.Count()
+
+	case EWMA:
+		t = MetricEWMA
+		m = pools[t].Get().(*Metric)
+		s := v.Snapshot()
+		m.Measures["rate"] = s.Rate()
+
+	case Gauge:
+		t = MetricGauge
+		m = pools[t].Get().(*Metric)
+		s := v.Snapshot()
+		m.Measures["value"] = s.Value()
+
+	case Histogram:
+		t = MetricHistogram
+		m = pools[t].Get().(*Metric)
+		s := v.Snapshot()
+		p := s.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999})
+		m.Measures["count"] = float64(s.Count())
+		m.Measures["max"] = float64(s.Max())
+		m.Measures["mean"] = s.Mean()
+		m.Measures["min"] = float64(s.Min())
+		m.Measures["stddev"] = s.StdDev()
+		m.Measures["variance"] = s.Variance()
+		m.Measures["p50"] = p[0]
+		m.Measures["p75"] = p[1]
+		m.Measures["p95"] = p[2]
+		m.Measures["p99"] = p[3]
+		m.Measures["p999"] = p[4]
+		m.Measures["p9999"] = p[5]
+
+	case Meter:
+		t = MetricMeter
+		m = pools[t].Get().(*Metric)
+		s := v.Snapshot()
+		m.Measures["count"] = float64(s.Count())
+		m.Measures["m1"] = s.Rate1()
+		m.Measures["m5"] = s.Rate5()
+		m.Measures["m15"] = s.Rate15()
+		m.Measures["mean"] = s.RateMean()
+
+	case Timer:
+		t = MetricTimer
+		m = pools[t].Get().(*Metric)
+		s := v.Snapshot()
+		p := s.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999})
+		m.Measures["count"] = float64(s.Count())
+		m.Measures["max"] = float64(s.Max())
+		m.Measures["mean"] = s.Mean()
+		m.Measures["min"] = float64(s.Min())
+		m.Measures["stddev"] = s.StdDev()
+		m.Measures["variance"] = s.Variance()
+		m.Measures["p50"] = p[0]
+		m.Measures["p75"] = p[1]
+		m.Measures["p95"] = p[2]
+		m.Measures["p99"] = p[3]
+		m.Measures["p999"] = p[4]
+		m.Measures["p9999"] = p[5]
+		m.Measures["m1"] = s.Rate1()
+		m.Measures["m5"] = s.Rate5()
+		m.Measures["m15"] = s.Rate15()
+		m.Measures["meanrate"] = s.RateMean()
+
+	default:
+		panic("unexpected metric type")
+
+	}
+
+	m.Timestamp = ts
+	m.Type = t
+	m.Name = name
+	return m
+}
diff --git a/runtime/runenv.go b/runtime/runenv.go
index 05add22..9cbd8da 100644
--- a/runtime/runenv.go
+++ b/runtime/runenv.go
@@ -1,219 +1,153 @@
 package runtime
 
 import (
-	"encoding/json"
-	"fmt"
-	"net"
 	"os"
-	"strconv"
-	"strings"
+	"sync"
 	"time"
 
-	"github.com/dustin/go-humanize"
+	"github.com/hashicorp/go-multierror"
+	influxdb2 "github.com/influxdata/influxdb-client-go"
 	"go.uber.org/zap"
 )
 
-const (
-	EnvTestBranch             = "TEST_BRANCH"
-	EnvTestCase               = "TEST_CASE"
-	EnvTestGroupID            = "TEST_GROUP_ID"
-	EnvTestGroupInstanceCount = "TEST_GROUP_INSTANCE_COUNT"
-	EnvTestInstanceCount      = "TEST_INSTANCE_COUNT"
-	EnvTestInstanceParams     = "TEST_INSTANCE_PARAMS"
-	EnvTestInstanceRole       = "TEST_INSTANCE_ROLE"
-	EnvTestOutputsPath        = "TEST_OUTPUTS_PATH"
-	EnvTestPlan               = "TEST_PLAN"
-	EnvTestRepo               = "TEST_REPO"
-	EnvTestRun                = "TEST_RUN"
-	EnvTestSidecar            = "TEST_SIDECAR"
-	EnvTestStartTime          = "TEST_START_TIME"
-	EnvTestSubnet             = "TEST_SUBNET"
-	EnvTestTag                = "TEST_TAG"
-)
-
-type IPNet struct {
-	net.IPNet
-}
-
-func (i IPNet) MarshalJSON() ([]byte, error) {
-	if len(i.IPNet.IP) == 0 {
-		return json.Marshal("")
-	}
-	return json.Marshal(i.String())
-}
-
-func (i *IPNet) UnmarshalJSON(data []byte) error {
-	var s string
-	if err := json.Unmarshal(data, &s); err != nil {
-		return err
-	}
-
-	if s == "" {
-		return nil
-	}
-
-	_, ipnet, err := net.ParseCIDR(s)
-	if err != nil {
-		return err
-	}
-
-	i.IPNet = *ipnet
-	return nil
-}
-
-// RunParams encapsulates the runtime parameters for this test.
-type RunParams struct {
-	TestPlan string `json:"plan"`
-	TestCase string `json:"case"`
-	TestRun  string `json:"run"`
-
-	TestRepo   string `json:"repo,omitempty"`
-	TestCommit string `json:"commit,omitempty"`
-	TestBranch string `json:"branch,omitempty"`
-	TestTag    string `json:"tag,omitempty"`
-
-	TestOutputsPath string `json:"outputs_path,omitempty"`
-
-	TestInstanceCount  int               `json:"instances"`
-	TestInstanceRole   string            `json:"role,omitempty"`
-	TestInstanceParams map[string]string `json:"params,omitempty"`
-
-	TestGroupID            string `json:"group,omitempty"`
-	TestGroupInstanceCount int    `json:"group_instances,omitempty"`
-
-	// true if the test has access to the sidecar.
-	TestSidecar bool `json:"test_sidecar,omitempty"`
-
-	// The subnet on which this test is running.
-	//
-	// The test instance can use this to pick an IP address and/or determine
-	// the "data" network interface.
-	//
-	// This will be 127.1.0.0/16 when using the local exec runner.
-	TestSubnet    *IPNet    `json:"network,omitempty"`
-	TestStartTime time.Time `json:"start_time,omitempty"`
-}
-
 // RunEnv encapsulates the context for this test run.
 type RunEnv struct {
 	RunParams
 	*logger
 
-	metrics      *Metrics
-	unstructured chan *os.File
-	structured   chan *zap.Logger
+	diagnostics *MetricsApi
+	results     *MetricsApi
+	influxdb    influxdb2.InfluxDBClient
+	wapi        influxdb2.WriteApi
+
+	wg        sync.WaitGroup
+	closeCh   chan struct{}
+	assetsErr error
+
+	unstructured struct {
+		files []*os.File
+		ch    chan *os.File
+	}
+	structured struct {
+		loggers []*zap.Logger
+		ch      chan *zap.Logger
+	}
 }
 
 // NewRunEnv constructs a runtime environment from the given runtime parameters.
 func NewRunEnv(params RunParams) *RunEnv {
 	re := &RunEnv{
 		RunParams: params,
-
-		structured:   make(chan *zap.Logger, 32),
-		unstructured: make(chan *os.File, 32),
+		logger:    newLogger(&params),
 	}
 
-	re.metrics = &Metrics{re}
-	re.logger = newLogger(&re.RunParams)
-	return re
-}
+	re.structured.ch = make(chan *zap.Logger)
+	re.unstructured.ch = make(chan *os.File)
 
-// M returns an object that groups the metrics facilities.
-func (re *RunEnv) M() *Metrics {
-	return re.metrics
-}
+	re.wg.Add(1)
+	go re.manageAssets()
 
-func (re *RunEnv) Close() error {
-	close(re.structured)
-	close(re.unstructured)
+	var dsinks = []SinkFn{LogSinkJSON(re, "diagnostics.out")}
+	client, err := NewInfluxDBClient()
+	if err == nil {
+		re.influxdb = client
+		wapi := client.WriteApi("testground", "diagnostics")
+		dsinks = append(dsinks, WriteToInfluxDB(re, wapi))
 
-	if l := re.logger; l != nil {
-		_ = l.SLogger().Sync()
+		re.wg.Add(1)
+		go re.monitorInfluxDBErrors()
+	} else {
+		re.logger.RecordMessage("InfluxDB unavailable; no metrics will be dispatched: %s", err)
 	}
 
-	for l := range re.structured {
-		_ = l.Sync() // ignore errors.
-	}
+	re.diagnostics = newMetricsApi(re, metricsApiOpts{
+		prefix: "diag.",
+		freq:   1 * time.Second,
+		sinks:  dsinks,
+	})
 
-	for f := range re.unstructured {
-		_ = f.Close() // ignore errors.
-	}
-	return nil
+	re.results = newMetricsApi(re, metricsApiOpts{
+		prefix: "results.",
+		freq:   1 * time.Second,
+		sinks:  []SinkFn{LogSinkJSON(re, "results.out")},
+	})
+
+	return re
 }
 
-func (re *RunParams) ToEnvVars() map[string]string {
-	packParams := func(in map[string]string) string {
-		arr := make([]string, 0, len(in))
-		for k, v := range in {
-			arr = append(arr, k+"="+v)
+// R returns a metrics object for results.
+func (re *RunEnv) R() *MetricsApi {
+	return re.results
+}
+
+// D returns a metrics object for diagnostics.
+func (re *RunEnv) D() *MetricsApi {
+	return re.diagnostics
+}
+
+func (re *RunEnv) monitorInfluxDBErrors() {
+	defer re.wg.Done()
+
+	for {
+		select {
+		case err := <-re.wapi.Errors():
+			if err == nil {
+				continue
+			}
+			re.RecordMessage("failed while writing to InfluxDB: %s", err)
+		case <-re.closeCh:
+			return
 		}
-		return strings.Join(arr, "|")
 	}
+}
 
-	out := map[string]string{
-		EnvTestBranch:             re.TestBranch,
-		EnvTestCase:               re.TestCase,
-		EnvTestGroupID:            re.TestGroupID,
-		EnvTestGroupInstanceCount: strconv.Itoa(re.TestGroupInstanceCount),
-		EnvTestInstanceCount:      strconv.Itoa(re.TestInstanceCount),
-		EnvTestInstanceParams:     packParams(re.TestInstanceParams),
-		EnvTestInstanceRole:       re.TestInstanceRole,
-		EnvTestOutputsPath:        re.TestOutputsPath,
-		EnvTestPlan:               re.TestPlan,
-		EnvTestRepo:               re.TestRepo,
-		EnvTestRun:                re.TestRun,
-		EnvTestSidecar:            strconv.FormatBool(re.TestSidecar),
-		EnvTestStartTime:          re.TestStartTime.Format(time.RFC3339),
-		EnvTestSubnet:             re.TestSubnet.String(),
-		EnvTestTag:                re.TestTag,
-	}
+func (re *RunEnv) manageAssets() {
+	defer re.wg.Done()
 
-	return out
-}
+	var err *multierror.Error
+	defer func() { re.assetsErr = err.ErrorOrNil() }()
 
-func unpackParams(packed string) map[string]string {
-	spltparams := strings.Split(packed, "|")
-	params := make(map[string]string, len(spltparams))
-	for _, s := range spltparams {
-		v := strings.Split(s, "=")
-		if len(v) != 2 {
-			continue
+	for {
+		select {
+		case f := <-re.unstructured.ch:
+			re.unstructured.files = append(re.unstructured.files, f)
+		case l := <-re.structured.ch:
+			re.structured.loggers = append(re.structured.loggers, l)
+		case <-re.closeCh:
+			for _, f := range re.unstructured.files {
+				err = multierror.Append(err, f.Close())
+			}
+			for _, l := range re.structured.loggers {
+				err = multierror.Append(err, l.Sync())
+			}
+			return
 		}
-		params[v[0]] = v[1]
 	}
-	return params
 }
 
-func toInt(s string) int {
-	v, err := strconv.Atoi(s)
-	if err != nil {
-		return -1
+func (re *RunEnv) Close() error {
+	var err *multierror.Error
+	err = multierror.Append(re.diagnostics.Close())
+	err = multierror.Append(re.results.Close())
+
+	if l := re.logger; l != nil {
+		_ = l.SLogger().Sync()
 	}
-	return v
-}
 
-func toBool(s string) bool {
-	v, _ := strconv.ParseBool(s)
-	return v
-}
+	close(re.closeCh)
+	re.wg.Wait()
+	err = multierror.Append(err, re.assetsErr)
 
-// toNet might parse any input, so it is possible to get an error and nil return value
-func toNet(s string) *IPNet {
-	_, ipnet, err := net.ParseCIDR(s)
-	if err != nil {
-		return nil
+	if re.wapi != nil {
+		re.wapi.Flush()
+		re.wapi.Close()
 	}
-	return &IPNet{IPNet: *ipnet}
-}
 
-// Try to parse the time.
-// Failing to do so, return a zero value time
-func toTime(s string) time.Time {
-	t, err := time.Parse(time.RFC3339, s)
-	if err != nil {
-		return time.Time{}
+	if re.influxdb != nil {
+		re.influxdb.Close()
 	}
-	return t
+
+	return err.ErrorOrNil()
 }
 
 // CurrentRunEnv populates a test context from environment vars.
@@ -222,32 +156,6 @@ func CurrentRunEnv() *RunEnv {
 	return re
 }
 
-// ParseRunParams parses a list of environment variables into a RunParams.
-func ParseRunParams(env []string) (*RunParams, error) {
-	m, err := ParseKeyValues(env)
-	if err != nil {
-		return nil, err
-	}
-
-	return &RunParams{
-		TestBranch:             m[EnvTestBranch],
-		TestCase:               m[EnvTestCase],
-		TestGroupID:            m[EnvTestGroupID],
-		TestGroupInstanceCount: toInt(m[EnvTestGroupInstanceCount]),
-		TestInstanceCount:      toInt(m[EnvTestInstanceCount]),
-		TestInstanceParams:     unpackParams(m[EnvTestInstanceParams]),
-		TestInstanceRole:       m[EnvTestInstanceRole],
-		TestOutputsPath:        m[EnvTestOutputsPath],
-		TestPlan:               m[EnvTestPlan],
-		TestRepo:               m[EnvTestRepo],
-		TestRun:                m[EnvTestRun],
-		TestSidecar:            toBool(m[EnvTestSidecar]),
-		TestStartTime:          toTime(EnvTestStartTime),
-		TestSubnet:             toNet(m[EnvTestSubnet]),
-		TestTag:                m[EnvTestTag],
-	}, nil
-}
-
 // ParseRunEnv parses a list of environment variables into a RunEnv.
 func ParseRunEnv(env []string) (*RunEnv, error) {
 	p, err := ParseRunParams(env)
@@ -257,116 +165,3 @@ func ParseRunEnv(env []string) (*RunEnv, error) {
 
 	return NewRunEnv(*p), nil
 }
-
-// IsParamSet checks if a certain parameter is set.
-func (re *RunParams) IsParamSet(name string) bool {
-	_, ok := re.TestInstanceParams[name]
-	return ok
-}
-
-// StringParam returns a string parameter, or "" if the parameter is not set.
-func (re *RunParams) StringParam(name string) string {
-	v, ok := re.TestInstanceParams[name]
-	if !ok {
-		panic(fmt.Errorf("%s was not set", name))
-	}
-	return v
-}
-
-func (re *RunParams) SizeParam(name string) uint64 {
-	v := re.TestInstanceParams[name]
-	m, err := humanize.ParseBytes(v)
-	if err != nil {
-		panic(err)
-	}
-	return m
-}
-
-// IntParam returns an int parameter, or -1 if the parameter is not set or
-// the conversion failed. It panics on error.
-func (re *RunParams) IntParam(name string) int {
-	v, ok := re.TestInstanceParams[name]
-	if !ok {
-		panic(fmt.Errorf("%s was not set", name))
-	}
-
-	i, err := strconv.Atoi(v)
-	if err != nil {
-		panic(err)
-	}
-	return i
-}
-
-// FloatParam returns a float64 parameter, or -1.0 if the parameter is not set or
-// the conversion failed. It panics on error.
-func (re *RunEnv) FloatParam(name string) float64 {
-	v, ok := re.TestInstanceParams[name]
-	if !ok {
-		return -1.0
-	}
-
-	f, err := strconv.ParseFloat(v, 32)
-	if err != nil {
-		panic(err)
-	}
-	return f
-}
-
-// BooleanParam returns the Boolean value of the parameter, or false if not passed
-func (re *RunParams) BooleanParam(name string) bool {
-	s, ok := re.TestInstanceParams[name]
-	return ok && strings.ToLower(s) == "true"
-}
-
-// StringArrayParam returns an array of string parameter, or an empty array
-// if it does not exist. It panics on error.
-func (re *RunParams) StringArrayParam(name string) []string {
-	a := []string{}
-	re.JSONParam(name, &a)
-	return a
-}
-
-// SizeArrayParam returns an array of uint64 elements which represent sizes,
-// in bytes. If the response is nil, then there was an error parsing the input.
-// It panics on error.
-func (re *RunParams) SizeArrayParam(name string) []uint64 {
-	humanSizes := re.StringArrayParam(name)
-	sizes := []uint64{}
-
-	for _, size := range humanSizes {
-		n, err := humanize.ParseBytes(size)
-		if err != nil {
-			panic(err)
-		}
-		sizes = append(sizes, n)
-	}
-
-	return sizes
-}
-
-// JSONParam unmarshals a JSON parameter in an arbitrary interface.
-// It panics on error.
-func (re *RunParams) JSONParam(name string, v interface{}) {
-	s, ok := re.TestInstanceParams[name]
-	if !ok {
-		panic(fmt.Errorf("%s was not set", name))
-	}
-
-	if err := json.Unmarshal([]byte(s), v); err != nil {
-		panic(err)
-	}
-}
-
-// Copied from github.com/ipfs/testground/pkg/conv, because we don't want the
-// SDK to depend on that package.
-func ParseKeyValues(in []string) (res map[string]string, err error) {
-	res = make(map[string]string, len(in))
-	for _, d := range in {
-		splt := strings.Split(d, "=")
-		if len(splt) < 2 {
-			return nil, fmt.Errorf("invalid key-value: %s", d)
-		}
-		res[splt[0]] = strings.Join(splt[1:], "=")
-	}
-	return res, nil
-}
diff --git a/runtime/runner.go b/runtime/runner.go
index b8b7420..c1ca50a 100644
--- a/runtime/runner.go
+++ b/runtime/runner.go
@@ -4,13 +4,12 @@ import (
 	"fmt"
 	"io"
 	"net"
+	"net/http"
+	_ "net/http/pprof"
 	"os"
 	"runtime/debug"
 	"strings"
 
-	"net/http"
-	_ "net/http/pprof"
-
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 )
 
@@ -88,7 +87,7 @@ func Invoke(tc TestCaseFn) {
 			runenv.RecordCrash(err)
 
 			// Developers expect panics to be recorded in run.err too.
-			fmt.Fprintln(os.Stderr, err)
+			_, _ = fmt.Fprintln(os.Stderr, err)
 			debug.PrintStack()
 		}
 	}()
diff --git a/runtime/runparams.go b/runtime/runparams.go
new file mode 100644
index 0000000..4f3ebae
--- /dev/null
+++ b/runtime/runparams.go
@@ -0,0 +1,289 @@
+package runtime
+
+import (
+	"encoding/json"
+	"fmt"
+	"net"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/dustin/go-humanize"
+)
+
+type IPNet struct {
+	net.IPNet
+}
+
+func (i IPNet) MarshalJSON() ([]byte, error) {
+	if len(i.IPNet.IP) == 0 {
+		return json.Marshal("")
+	}
+	return json.Marshal(i.String())
+}
+
+func (i *IPNet) UnmarshalJSON(data []byte) error {
+	var s string
+	if err := json.Unmarshal(data, &s); err != nil {
+		return err
+	}
+
+	if s == "" {
+		return nil
+	}
+
+	_, ipnet, err := net.ParseCIDR(s)
+	if err != nil {
+		return err
+	}
+
+	i.IPNet = *ipnet
+	return nil
+}
+
+// RunParams encapsulates the runtime parameters for this test.
+type RunParams struct {
+	TestPlan string `json:"plan"`
+	TestCase string `json:"case"`
+	TestRun  string `json:"run"`
+
+	TestRepo   string `json:"repo,omitempty"`
+	TestCommit string `json:"commit,omitempty"`
+	TestBranch string `json:"branch,omitempty"`
+	TestTag    string `json:"tag,omitempty"`
+
+	TestOutputsPath string `json:"outputs_path,omitempty"`
+
+	TestInstanceCount  int               `json:"instances"`
+	TestInstanceRole   string            `json:"role,omitempty"`
+	TestInstanceParams map[string]string `json:"params,omitempty"`
+
+	TestGroupID            string `json:"group,omitempty"`
+	TestGroupInstanceCount int    `json:"group_instances,omitempty"`
+
+	// true if the test has access to the sidecar.
+	TestSidecar bool `json:"test_sidecar,omitempty"`
+
+	// The subnet on which this test is running.
+	//
+	// The test instance can use this to pick an IP address and/or determine
+	// the "data" network interface.
+	//
+	// This will be 127.1.0.0/16 when using the local exec runner.
+	TestSubnet    *IPNet    `json:"network,omitempty"`
+	TestStartTime time.Time `json:"start_time,omitempty"`
+}
+
+// ParseRunParams parses a list of environment variables into a RunParams.
+func ParseRunParams(env []string) (*RunParams, error) {
+	m, err := ParseKeyValues(env)
+	if err != nil {
+		return nil, err
+	}
+
+	return &RunParams{
+		TestBranch:             m[EnvTestBranch],
+		TestCase:               m[EnvTestCase],
+		TestGroupID:            m[EnvTestGroupID],
+		TestGroupInstanceCount: toInt(m[EnvTestGroupInstanceCount]),
+		TestInstanceCount:      toInt(m[EnvTestInstanceCount]),
+		TestInstanceParams:     unpackParams(m[EnvTestInstanceParams]),
+		TestInstanceRole:       m[EnvTestInstanceRole],
+		TestOutputsPath:        m[EnvTestOutputsPath],
+		TestPlan:               m[EnvTestPlan],
+		TestRepo:               m[EnvTestRepo],
+		TestRun:                m[EnvTestRun],
+		TestSidecar:            toBool(m[EnvTestSidecar]),
+		TestStartTime:          toTime(EnvTestStartTime),
+		TestSubnet:             toNet(m[EnvTestSubnet]),
+		TestTag:                m[EnvTestTag],
+	}, nil
+}
+
+func (rp *RunParams) ToEnvVars() map[string]string {
+	packParams := func(in map[string]string) string {
+		arr := make([]string, 0, len(in))
+		for k, v := range in {
+			arr = append(arr, k+"="+v)
+		}
+		return strings.Join(arr, "|")
+	}
+
+	out := map[string]string{
+		EnvTestBranch:             rp.TestBranch,
+		EnvTestCase:               rp.TestCase,
+		EnvTestGroupID:            rp.TestGroupID,
+		EnvTestGroupInstanceCount: strconv.Itoa(rp.TestGroupInstanceCount),
+		EnvTestInstanceCount:      strconv.Itoa(rp.TestInstanceCount),
+		EnvTestInstanceParams:     packParams(rp.TestInstanceParams),
+		EnvTestInstanceRole:       rp.TestInstanceRole,
+		EnvTestOutputsPath:        rp.TestOutputsPath,
+		EnvTestPlan:               rp.TestPlan,
+		EnvTestRepo:               rp.TestRepo,
+		EnvTestRun:                rp.TestRun,
+		EnvTestSidecar:            strconv.FormatBool(rp.TestSidecar),
+		EnvTestStartTime:          rp.TestStartTime.Format(time.RFC3339),
+		EnvTestSubnet:             rp.TestSubnet.String(),
+		EnvTestTag:                rp.TestTag,
+	}
+
+	return out
+}
+
+// IsParamSet checks if a certain parameter is set.
+func (rp *RunParams) IsParamSet(name string) bool {
+	_, ok := rp.TestInstanceParams[name]
+	return ok
+}
+
+// StringParam returns a string parameter, or "" if the parameter is not set.
+func (rp *RunParams) StringParam(name string) string {
+	v, ok := rp.TestInstanceParams[name]
+	if !ok {
+		panic(fmt.Errorf("%s was not set", name))
+	}
+	return v
+}
+
+func (rp *RunParams) SizeParam(name string) uint64 {
+	v := rp.TestInstanceParams[name]
+	m, err := humanize.ParseBytes(v)
+	if err != nil {
+		panic(err)
+	}
+	return m
+}
+
+// IntParam returns an int parameter, or -1 if the parameter is not set or
+// the conversion failed. It panics on error.
+func (rp *RunParams) IntParam(name string) int {
+	v, ok := rp.TestInstanceParams[name]
+	if !ok {
+		panic(fmt.Errorf("%s was not set", name))
+	}
+
+	i, err := strconv.Atoi(v)
+	if err != nil {
+		panic(err)
+	}
+	return i
+}
+
+// FloatParam returns a float64 parameter, or -1.0 if the parameter is not set or
+// the conversion failed. It panics on error.
+func (rp *RunParams) FloatParam(name string) float64 {
+	v, ok := rp.TestInstanceParams[name]
+	if !ok {
+		return -1.0
+	}
+
+	f, err := strconv.ParseFloat(v, 32)
+	if err != nil {
+		panic(err)
+	}
+	return f
+}
+
+// BooleanParam returns the Boolean value of the parameter, or false if not passed
+func (rp *RunParams) BooleanParam(name string) bool {
+	s, ok := rp.TestInstanceParams[name]
+	return ok && strings.ToLower(s) == "true"
+}
+
+// StringArrayParam returns an array of string parameter, or an empty array
+// if it does not exist. It panics on error.
+func (rp *RunParams) StringArrayParam(name string) []string {
+	var a []string
+	rp.JSONParam(name, &a)
+	return a
+}
+
+// SizeArrayParam returns an array of uint64 elements which represent sizes,
+// in bytes. If the response is nil, then there was an error parsing the input.
+// It panics on error.
+func (rp *RunParams) SizeArrayParam(name string) []uint64 {
+	humanSizes := rp.StringArrayParam(name)
+	var sizes []uint64
+
+	for _, size := range humanSizes {
+		n, err := humanize.ParseBytes(size)
+		if err != nil {
+			panic(err)
+		}
+		sizes = append(sizes, n)
+	}
+
+	return sizes
+}
+
+// JSONParam unmarshals a JSON parameter in an arbitrary interface.
+// It panics on error.
+func (rp *RunParams) JSONParam(name string, v interface{}) {
+	s, ok := rp.TestInstanceParams[name]
+	if !ok {
+		panic(fmt.Errorf("%s was not set", name))
+	}
+
+	if err := json.Unmarshal([]byte(s), v); err != nil {
+		panic(err)
+	}
+}
+
+// Copied from github.com/ipfs/testground/pkg/conv, because we don't want the
+// SDK to depend on that package.
+func ParseKeyValues(in []string) (res map[string]string, err error) {
+	res = make(map[string]string, len(in))
+	for _, d := range in {
+		splt := strings.Split(d, "=")
+		if len(splt) < 2 {
+			return nil, fmt.Errorf("invalid key-value: %s", d)
+		}
+		res[splt[0]] = strings.Join(splt[1:], "=")
+	}
+	return res, nil
+}
+
+func unpackParams(packed string) map[string]string {
+	spltparams := strings.Split(packed, "|")
+	params := make(map[string]string, len(spltparams))
+	for _, s := range spltparams {
+		v := strings.Split(s, "=")
+		if len(v) != 2 {
+			continue
+		}
+		params[v[0]] = v[1]
+	}
+	return params
+}
+
+func toInt(s string) int {
+	v, err := strconv.Atoi(s)
+	if err != nil {
+		return -1
+	}
+	return v
+}
+
+func toBool(s string) bool {
+	v, _ := strconv.ParseBool(s)
+	return v
+}
+
+// toNet might parse any input, so it is possible to get an error and nil return value
+func toNet(s string) *IPNet {
+	_, ipnet, err := net.ParseCIDR(s)
+	if err != nil {
+		return nil
+	}
+	return &IPNet{IPNet: *ipnet}
+}
+
+// Try to parse the time.
+// Failing to do so, return a zero value time
+func toTime(s string) time.Time {
+	t, err := time.Parse(time.RFC3339, s)
+	if err != nil {
+		return time.Time{}
+	}
+	return t
+}
diff --git a/runtime/sinks.go b/runtime/sinks.go
new file mode 100644
index 0000000..1d9fefd
--- /dev/null
+++ b/runtime/sinks.go
@@ -0,0 +1,37 @@
+package runtime
+
+import (
+	"encoding/json"
+	"time"
+
+	"github.com/influxdata/influxdb-client-go"
+)
+
+func LogSinkJSON(re *RunEnv, filename string) SinkFn {
+	f, err := re.CreateRawAsset(filename)
+	if err != nil {
+		panic(err)
+	}
+
+	enc := json.NewEncoder(f)
+	return func(m *Metric) error {
+		return enc.Encode(m)
+	}
+}
+
+func WriteToInfluxDB(re *RunEnv, w influxdb2.WriteApi) SinkFn {
+	tags := map[string]string{
+		"plan":     re.TestPlan,
+		"case":     re.TestCase,
+		"run":      re.TestRun,
+		"group_id": re.TestGroupID,
+	}
+
+	return func(m *Metric) error {
+		// NewPoint copies all tags and fields, so this is thread-safe.
+		p := influxdb2.NewPoint(m.Name, tags, m.Measures, time.Unix(0, m.Timestamp))
+		p.AddTag("type", m.Type.String())
+		w.WritePoint(p)
+		return nil
+	}
+}

From d653ac071e8ae76ad64317b6b460edc0c51403df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Tue, 28 Apr 2020 15:37:58 +0100
Subject: [PATCH 02/13] batch write results to InfluxDB on close.

---
 go.mod                                 |  1 +
 go.sum                                 |  2 +
 runtime/{metrics.go => metrics_api.go} |  0
 runtime/runenv.go                      | 76 ++++++++++++++++++++++++--
 4 files changed, 75 insertions(+), 4 deletions(-)
 rename runtime/{metrics.go => metrics_api.go} (100%)

diff --git a/go.mod b/go.mod
index dc987f5..9e83bdb 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@ module github.com/testground/sdk-go
 go 1.14
 
 require (
+	github.com/avast/retry-go v2.6.0+incompatible
 	github.com/dustin/go-humanize v1.0.0
 	github.com/go-redis/redis/v7 v7.2.0
 	github.com/hashicorp/go-multierror v1.1.0
diff --git a/go.sum b/go.sum
index 3c1b3e6..508d916 100644
--- a/go.sum
+++ b/go.sum
@@ -4,6 +4,8 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
 github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
 github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
 github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
+github.com/avast/retry-go v2.6.0+incompatible h1:FelcMrm7Bxacr1/RM8+/eqkDkmVN7tjlsy51dOzB3LI=
+github.com/avast/retry-go v2.6.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
 github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
diff --git a/runtime/metrics.go b/runtime/metrics_api.go
similarity index 100%
rename from runtime/metrics.go
rename to runtime/metrics_api.go
diff --git a/runtime/runenv.go b/runtime/runenv.go
index 9cbd8da..c4f9dea 100644
--- a/runtime/runenv.go
+++ b/runtime/runenv.go
@@ -1,10 +1,14 @@
 package runtime
 
 import (
+	"context"
+	"encoding/json"
 	"os"
+	"path/filepath"
 	"sync"
 	"time"
 
+	"github.com/avast/retry-go"
 	"github.com/hashicorp/go-multierror"
 	influxdb2 "github.com/influxdata/influxdb-client-go"
 	"go.uber.org/zap"
@@ -134,15 +138,26 @@ func (re *RunEnv) Close() error {
 		_ = l.SLogger().Sync()
 	}
 
-	close(re.closeCh)
-	re.wg.Wait()
-	err = multierror.Append(err, re.assetsErr)
-
+	// Flush the diagnostics InfluxDB writer.
 	if re.wapi != nil {
 		re.wapi.Flush()
 		re.wapi.Close()
 	}
 
+	// Next, we reopen the results.out file, and upload all points to InfluxDB
+	// using the blocking API.
+	results, err2 := os.OpenFile(filepath.Join(re.TestOutputsPath, "results.out"), os.O_RDONLY, 0666)
+	if err2 == nil {
+		err2 = re.batchInsertInfluxDB(results)
+	}
+	err = multierror.Append(err, err2)
+
+	// This close stops monitoring the wapi errors channel, and closes assets.
+	close(re.closeCh)
+	re.wg.Wait()
+	err = multierror.Append(err, re.assetsErr)
+
+	// Now we're ready to close InfluxDB.
 	if re.influxdb != nil {
 		re.influxdb.Close()
 	}
@@ -150,6 +165,59 @@ func (re *RunEnv) Close() error {
 	return err.ErrorOrNil()
 }
 
+func (re *RunEnv) batchInsertInfluxDB(results *os.File) error {
+	tags := map[string]string{
+		"plan":     re.TestPlan,
+		"case":     re.TestCase,
+		"run":      re.TestRun,
+		"group_id": re.TestGroupID,
+	}
+
+	var (
+		count  int
+		points []*influxdb2.Point
+	)
+
+	wapib := re.influxdb.WriteApiBlocking("testground", "results")
+	for dec := json.NewDecoder(results); dec.More(); {
+		var m Metric
+		if err := dec.Decode(&m); err != nil {
+			re.RecordMessage("failed to decode Metric from results.out: %s", err)
+			continue
+		}
+
+		// NewPoint copies all tags and fields, so this is thread-safe.
+		p := influxdb2.NewPoint(m.Name, tags, m.Measures, time.Unix(0, m.Timestamp))
+		p.AddTag("type", m.Type.String())
+		points = append(points, p)
+		count++
+
+		// upload a batch every 500 points, or if this is the last point.
+		if count%500 == 0 || !dec.More() {
+			logger := func(n uint, err error) {
+				re.RecordMessage("failed to upload result points on attempt %d to InfluxDB: %s", n, err)
+			}
+
+			write := func() error {
+				ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+				defer cancel()
+
+				return wapib.WritePoint(ctx, points...)
+			}
+
+			// retry 5 times, with a delay of 1 seconds, and the default jitter, logging each attempt
+			// into the runenv.
+			err := retry.Do(write, retry.Attempts(5), retry.Delay(1*time.Second), retry.OnRetry(logger))
+
+			if err != nil {
+				re.RecordMessage("failed completely to upload a batch of result points to InfluxDB: %s", err)
+			}
+			points = points[:0]
+		}
+	}
+	return nil
+}
+
 // CurrentRunEnv populates a test context from environment vars.
 func CurrentRunEnv() *RunEnv {
 	re, _ := ParseRunEnv(os.Environ())

From 420290269be53cecaf9f5d779a8a9764b6de7038 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Tue, 28 Apr 2020 16:11:53 +0100
Subject: [PATCH 03/13] test utils for sdk-go.

---
 runtime/events.go           |  2 +-
 sync/barrier_test.go        | 23 ++++++++++++------
 sync/common_test.go         | 25 --------------------
 sync/gc_test.go             |  5 +++-
 sync/generic_client_test.go |  5 +++-
 sync/topic_test.go          | 34 ++++++++++++++++++---------
 test/runenv.go              | 47 +++++++++++++++++++++++++++++++++++++
 7 files changed, 95 insertions(+), 46 deletions(-)
 create mode 100644 test/runenv.go

diff --git a/runtime/events.go b/runtime/events.go
index f06bb5c..e87ab74 100644
--- a/runtime/events.go
+++ b/runtime/events.go
@@ -135,7 +135,7 @@ func (l *logger) RecordFailure(err error) {
 func (l *logger) RecordCrash(err interface{}) {
 	evt := Event{
 		Type:       EventTypeFinish,
-		Outcome:    EventOutcomeFailed,
+		Outcome:    EventOutcomeCrashed,
 		Error:      fmt.Sprintf("%s", err),
 		Stacktrace: string(debug.Stack()),
 	}
diff --git a/sync/barrier_test.go b/sync/barrier_test.go
index a0a7a07..dbaf6aa 100644
--- a/sync/barrier_test.go
+++ b/sync/barrier_test.go
@@ -6,13 +6,16 @@ import (
 	"time"
 
 	"golang.org/x/sync/errgroup"
+
+	"github.com/testground/sdk-go/test"
 )
 
 func TestBarrier(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv := randomRunEnv()
+	runenv, cleanup := test.RandomRunEnv(t)
+	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
 	if err != nil {
@@ -44,7 +47,8 @@ func TestBarrierBeyondTarget(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv := randomRunEnv()
+	runenv, cleanup := test.RandomRunEnv(t)
+	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
 	if err != nil {
@@ -71,7 +75,8 @@ func TestBarrierZero(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv := randomRunEnv()
+	runenv, cleanup := test.RandomRunEnv(t)
+	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
 	if err != nil {
@@ -97,7 +102,8 @@ func TestBarrierCancel(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv := randomRunEnv()
+	runenv, cleanup := test.RandomRunEnv(t)
+	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
 	if err != nil {
@@ -124,7 +130,8 @@ func TestBarrierDeadline(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv := randomRunEnv()
+	runenv, cleanup := test.RandomRunEnv(t)
+	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
 	if err != nil {
@@ -153,7 +160,8 @@ func TestSignalAndWait(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv := randomRunEnv()
+	runenv, cleanup := test.RandomRunEnv(t)
+	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
 	if err != nil {
@@ -175,7 +183,8 @@ func TestSignalAndWait(t *testing.T) {
 }
 
 func TestSignalAndWaitTimeout(t *testing.T) {
-	runenv := randomRunEnv()
+	runenv, cleanup := test.RandomRunEnv(t)
+	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(context.Background(), runenv)
 	if err != nil {
diff --git a/sync/common_test.go b/sync/common_test.go
index ff37274..f18d968 100644
--- a/sync/common_test.go
+++ b/sync/common_test.go
@@ -2,18 +2,14 @@ package sync
 
 import (
 	"context"
-	"crypto/sha1"
 	"fmt"
 	"math/rand"
-	"net"
 	"os"
 	"os/exec"
 	"testing"
 	"time"
 
 	"go.uber.org/zap"
-
-	"github.com/testground/sdk-go/runtime"
 )
 
 func TestMain(m *testing.M) {
@@ -72,24 +68,3 @@ func ensureRedis() (func() error, error) {
 		return nil
 	}, nil
 }
-
-// randomRunEnv generates a random RunEnv for testing purposes.
-func randomRunEnv() *runtime.RunEnv {
-	b := make([]byte, 32)
-	_, _ = rand.Read(b)
-
-	_, subnet, _ := net.ParseCIDR("127.1.0.1/16")
-
-	return runtime.NewRunEnv(runtime.RunParams{
-		TestPlan:           fmt.Sprintf("testplan-%d", rand.Uint32()),
-		TestSidecar:        false,
-		TestCase:           fmt.Sprintf("testcase-%d", rand.Uint32()),
-		TestRun:            fmt.Sprintf("testrun-%d", rand.Uint32()),
-		TestRepo:           "github.com/ipfs/go-ipfs",
-		TestSubnet:         &runtime.IPNet{IPNet: *subnet},
-		TestCommit:         fmt.Sprintf("%x", sha1.Sum(b)),
-		TestInstanceCount:  int(1 + (rand.Uint32() % 999)),
-		TestInstanceRole:   "",
-		TestInstanceParams: make(map[string]string),
-	})
-}
diff --git a/sync/gc_test.go b/sync/gc_test.go
index cb10e5c..e9b186b 100644
--- a/sync/gc_test.go
+++ b/sync/gc_test.go
@@ -7,6 +7,8 @@ import (
 	"fmt"
 	"testing"
 	"time"
+
+	"github.com/testground/sdk-go/test"
 )
 
 func TestGC(t *testing.T) {
@@ -15,7 +17,8 @@ func TestGC(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 	defer cancel()
 
-	runenv := randomRunEnv()
+	runenv, cleanup := test.RandomRunEnv(t)
+	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
 	if err != nil {
diff --git a/sync/generic_client_test.go b/sync/generic_client_test.go
index 5a2eff1..598e56e 100644
--- a/sync/generic_client_test.go
+++ b/sync/generic_client_test.go
@@ -7,6 +7,8 @@ import (
 	"time"
 
 	"go.uber.org/zap"
+
+	"github.com/testground/sdk-go/test"
 )
 
 // TestGenericClientRunEnv checks that states and payloads published by a bound
@@ -26,7 +28,8 @@ func TestGenericClientRunEnv(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
 
-	runenv := randomRunEnv()
+	runenv, cleanup := test.RandomRunEnv(t)
+	t.Cleanup(cleanup)
 
 	bclient, err := NewBoundClient(ctx, runenv)
 	if err != nil {
diff --git a/sync/topic_test.go b/sync/topic_test.go
index f34c9ad..bb221ca 100644
--- a/sync/topic_test.go
+++ b/sync/topic_test.go
@@ -7,6 +7,8 @@ import (
 	"testing"
 
 	"golang.org/x/sync/errgroup"
+
+	"github.com/testground/sdk-go/test"
 )
 
 type TestPayload struct {
@@ -19,10 +21,12 @@ type TestPayload struct {
 
 func TestSubscribeAfterAllPublished(t *testing.T) {
 	var (
-		iterations = 1000
-		runenv     = randomRunEnv()
+		iterations      = 1000
+		runenv, cleanup = test.RandomRunEnv(t)
 	)
 
+	t.Cleanup(cleanup)
+
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
@@ -64,10 +68,12 @@ func TestSubscribeAfterAllPublished(t *testing.T) {
 
 func TestSubscribeFirstConcurrentWrites(t *testing.T) {
 	var (
-		iterations = 1000
-		runenv     = randomRunEnv()
+		iterations      = 1000
+		runenv, cleanup = test.RandomRunEnv(t)
 	)
 
+	t.Cleanup(cleanup)
+
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
@@ -118,11 +124,13 @@ func TestSubscribeFirstConcurrentWrites(t *testing.T) {
 
 func TestSubscriptionConcurrentPublishersSubscribers(t *testing.T) {
 	var (
-		topics     = 100
-		iterations = 100
-		runenv     = randomRunEnv()
+		topics          = 100
+		iterations      = 100
+		runenv, cleanup = test.RandomRunEnv(t)
 	)
 
+	t.Cleanup(cleanup)
+
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
@@ -169,7 +177,9 @@ func TestSubscriptionConcurrentPublishersSubscribers(t *testing.T) {
 }
 
 func TestSubscriptionValidation(t *testing.T) {
-	runenv := randomRunEnv()
+	runenv, cleanup := test.RandomRunEnv(t)
+
+	t.Cleanup(cleanup)
 
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -205,11 +215,13 @@ func TestSubscriptionValidation(t *testing.T) {
 
 func TestSequenceOnWrite(t *testing.T) {
 	var (
-		iterations = 1000
-		runenv     = randomRunEnv()
-		topic      = &Topic{name: "pandemic", typ: reflect.TypeOf("")}
+		iterations      = 1000
+		topic           = &Topic{name: "pandemic", typ: reflect.TypeOf("")}
+		runenv, cleanup = test.RandomRunEnv(t)
 	)
 
+	t.Cleanup(cleanup)
+
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
diff --git a/test/runenv.go b/test/runenv.go
new file mode 100644
index 0000000..75974ed
--- /dev/null
+++ b/test/runenv.go
@@ -0,0 +1,47 @@
+package test
+
+import (
+	"fmt"
+	"io/ioutil"
+	"math/rand"
+	"net"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/testground/sdk-go/runtime"
+)
+
+// RandomRunEnv generates a random RunEnv for testing purposes.
+func RandomRunEnv(t *testing.T) (re *runtime.RunEnv, cleanup func()) {
+	t.Helper()
+
+	b := make([]byte, 32)
+	_, _ = rand.Read(b)
+
+	_, subnet, _ := net.ParseCIDR("127.1.0.1/16")
+
+	odir, err := ioutil.TempDir("", "testground-tests-*")
+	if err != nil {
+		t.Fatalf("failed to create temp output dir: %s", err)
+	}
+
+	rp := runtime.RunParams{
+		TestPlan:               fmt.Sprintf("testplan-%d", rand.Uint32()),
+		TestSidecar:            false,
+		TestCase:               fmt.Sprintf("testcase-%d", rand.Uint32()),
+		TestRun:                fmt.Sprintf("testrun-%d", rand.Uint32()),
+		TestSubnet:             &runtime.IPNet{IPNet: *subnet},
+		TestInstanceCount:      int(1 + (rand.Uint32() % 999)),
+		TestInstanceRole:       "",
+		TestInstanceParams:     make(map[string]string),
+		TestGroupID:            fmt.Sprintf("group-%d", rand.Uint32()),
+		TestStartTime:          time.Now(),
+		TestGroupInstanceCount: int(1 + (rand.Uint32() % 999)),
+		TestOutputsPath:        odir,
+	}
+
+	return runtime.NewRunEnv(rp), func() {
+		_ = os.RemoveAll(odir)
+	}
+}

From fa3c2b4655894c63a5fbfdf868869b7318b5e8f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Tue, 28 Apr 2020 22:36:15 +0100
Subject: [PATCH 04/13] influxdb v2

---
 go.mod                                  |   2 +
 go.sum                                  |   2 +
 runtime/influxdb.go                     |   4 +
 runtime/metrics_api.go                  |   7 +-
 runtime/metrics_types.go                |  33 ++++-
 runtime/runenv.go                       |  73 ++++++-----
 runtime/{files.go => runenv_assets.go}  |   0
 runtime/{events.go => runenv_events.go} |  59 +++++++--
 runtime/{logger.go => runenv_logger.go} |  35 ++----
 runtime/runenv_test.go                  | 160 ++++++++++++++++++++++++
 runtime/sinks.go                        |  13 +-
 runtime/test_utils.go                   |  45 +++++++
 sync/barrier_test.go                    |  16 +--
 sync/gc_test.go                         |   4 +-
 sync/generic_client_test.go             |   4 +-
 sync/topic_test.go                      |  12 +-
 16 files changed, 363 insertions(+), 106 deletions(-)
 rename runtime/{files.go => runenv_assets.go} (100%)
 rename runtime/{events.go => runenv_events.go} (66%)
 rename runtime/{logger.go => runenv_logger.go} (57%)
 create mode 100644 runtime/test_utils.go

diff --git a/go.mod b/go.mod
index 9e83bdb..d8ecc41 100644
--- a/go.mod
+++ b/go.mod
@@ -8,8 +8,10 @@ require (
 	github.com/go-redis/redis/v7 v7.2.0
 	github.com/hashicorp/go-multierror v1.1.0
 	github.com/influxdata/influxdb-client-go v1.1.0
+	github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d
 	github.com/prometheus/client_golang v1.5.1
 	github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0
+	github.com/stretchr/testify v1.4.0
 	go.uber.org/zap v1.14.1
 	golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
 )
diff --git a/go.sum b/go.sum
index 508d916..7edd436 100644
--- a/go.sum
+++ b/go.sum
@@ -52,6 +52,8 @@ github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
 github.com/influxdata/influxdb-client-go v1.1.0 h1:ht1HvNAfBuwY9/H0i1tOiVH4vHpkZ3gFwYD5j/xvyFA=
 github.com/influxdata/influxdb-client-go v1.1.0/go.mod h1:ZVjaPW87aKp5hzyny2WVpWVF0UY+iqtPz9veOZ2T1zw=
+github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d h1:/WZQPMZNsjZ7IlCpsLGdQBINg5bxKQ1K1sh6awxLtkA=
+github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo=
 github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 h1:W9WBk7wlPfJLvMCdtV4zPulc4uCPrlywQOmbFOhgQNU=
 github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
diff --git a/runtime/influxdb.go b/runtime/influxdb.go
index 0771d4b..aa9e0a4 100644
--- a/runtime/influxdb.go
+++ b/runtime/influxdb.go
@@ -21,6 +21,9 @@ func NewInfluxDBClient() (influxdb2.InfluxDBClient, error) {
 	}
 
 	auth := os.Getenv(EnvInfluxDBAuthToken)
+	if auth == "" {
+		return nil, fmt.Errorf("no InfluxDB auth token in $%s env var", EnvInfluxDBAuthToken)
+	}
 
 	opts := influxdb2.DefaultOptions()
 	opts.SetMaxRetries(10)
@@ -34,5 +37,6 @@ func NewInfluxDBClient() (influxdb2.InfluxDBClient, error) {
 	if ok, err := client.Ready(ctx); err != nil || !ok {
 		return nil, fmt.Errorf("influxdb not ready: %w", err)
 	}
+
 	return client, nil
 }
diff --git a/runtime/metrics_api.go b/runtime/metrics_api.go
index fa4d0fd..3b29000 100644
--- a/runtime/metrics_api.go
+++ b/runtime/metrics_api.go
@@ -42,15 +42,14 @@ type MetricsApi struct {
 }
 
 type metricsApiOpts struct {
-	prefix string
-	freq   time.Duration
-	sinks  []SinkFn
+	freq  time.Duration
+	sinks []SinkFn
 }
 
 func newMetricsApi(re *RunEnv, opts metricsApiOpts) *MetricsApi {
 	m := &MetricsApi{
 		re:           re,
-		reg:          metrics.NewPrefixedRegistry(opts.prefix),
+		reg:          metrics.NewRegistry(),
 		sinks:        opts.sinks,
 		freq:         opts.freq,
 		freqChangeCh: make(chan time.Duration),
diff --git a/runtime/metrics_types.go b/runtime/metrics_types.go
index 2659cd6..c5c37a1 100644
--- a/runtime/metrics_types.go
+++ b/runtime/metrics_types.go
@@ -1,6 +1,8 @@
 package runtime
 
 import (
+	"encoding/json"
+	"fmt"
 	"sync"
 	"time"
 )
@@ -17,8 +19,29 @@ const (
 	MetricTimer
 )
 
+var typeMappings = [...]string{"point", "counter", "ewma", "gauge", "histogram", "meter", "timer"}
+
 func (mt MetricType) String() string {
-	return [...]string{"point", "counter", "ewma", "gauge", "histogram", "meter", "timer"}[mt]
+	return typeMappings[mt]
+}
+
+func (mt MetricType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(mt.String())
+}
+
+// UnmarshalJSON is only used for testing; it's inefficient but not relevant.
+func (mt *MetricType) UnmarshalJSON(b []byte) error {
+	var s string
+	if err := json.Unmarshal(b, &s); err != nil {
+		return nil
+	}
+	for i, m := range typeMappings {
+		if m == s {
+			*mt = MetricType(i)
+			return nil
+		}
+	}
+	return fmt.Errorf("invalid metric type")
 }
 
 var pools = func() (p [7]sync.Pool) {
@@ -32,9 +55,9 @@ var pools = func() (p [7]sync.Pool) {
 
 type Metric struct {
 	Timestamp int64                  `json:"ts"`
-	Type      MetricType             `json:"t"`
-	Name      string                 `json:"n"`
-	Measures  map[string]interface{} `json:"m"`
+	Type      MetricType             `json:"type"`
+	Name      string                 `json:"name"`
+	Measures  map[string]interface{} `json:"measures"`
 }
 
 func (m *Metric) Release() {
@@ -52,7 +75,7 @@ func NewMetric(name string, i interface{}) *Metric {
 	case Point:
 		t = MetricPoint
 		m = pools[t].Get().(*Metric)
-		m.Measures["value"] = v
+		m.Measures["value"] = float64(v)
 
 	case Counter:
 		t = MetricCounter
diff --git a/runtime/runenv.go b/runtime/runenv.go
index c4f9dea..38f1e92 100644
--- a/runtime/runenv.go
+++ b/runtime/runenv.go
@@ -17,12 +17,14 @@ import (
 // RunEnv encapsulates the context for this test run.
 type RunEnv struct {
 	RunParams
-	*logger
+
+	logger *zap.Logger
 
 	diagnostics *MetricsApi
 	results     *MetricsApi
 	influxdb    influxdb2.InfluxDBClient
 	wapi        influxdb2.WriteApi
+	tags        map[string]string
 
 	wg        sync.WaitGroup
 	closeCh   chan struct{}
@@ -38,12 +40,17 @@ type RunEnv struct {
 	}
 }
 
+func (re *RunEnv) SLogger() *zap.SugaredLogger {
+	return re.logger.Sugar()
+}
+
 // NewRunEnv constructs a runtime environment from the given runtime parameters.
 func NewRunEnv(params RunParams) *RunEnv {
 	re := &RunEnv{
 		RunParams: params,
-		logger:    newLogger(&params),
+		closeCh:   make(chan struct{}),
 	}
+	re.initLogger()
 
 	re.structured.ch = make(chan *zap.Logger)
 	re.unstructured.ch = make(chan *os.File)
@@ -54,26 +61,31 @@ func NewRunEnv(params RunParams) *RunEnv {
 	var dsinks = []SinkFn{LogSinkJSON(re, "diagnostics.out")}
 	client, err := NewInfluxDBClient()
 	if err == nil {
+		re.tags = map[string]string{
+			"plan":     re.TestPlan,
+			"case":     re.TestCase,
+			"run":      re.TestRun,
+			"group_id": re.TestGroupID,
+		}
+
 		re.influxdb = client
-		wapi := client.WriteApi("testground", "diagnostics")
-		dsinks = append(dsinks, WriteToInfluxDB(re, wapi))
+		re.wapi = client.WriteApi("testground", "diagnostics")
+		dsinks = append(dsinks, WriteToInfluxDB(re))
 
 		re.wg.Add(1)
 		go re.monitorInfluxDBErrors()
 	} else {
-		re.logger.RecordMessage("InfluxDB unavailable; no metrics will be dispatched: %s", err)
+		re.RecordMessage("InfluxDB unavailable; no metrics will be dispatched: %s", err)
 	}
 
 	re.diagnostics = newMetricsApi(re, metricsApiOpts{
-		prefix: "diag.",
-		freq:   1 * time.Second,
-		sinks:  dsinks,
+		freq:  1 * time.Second,
+		sinks: dsinks,
 	})
 
 	re.results = newMetricsApi(re, metricsApiOpts{
-		prefix: "results.",
-		freq:   1 * time.Second,
-		sinks:  []SinkFn{LogSinkJSON(re, "results.out")},
+		freq:  1 * time.Second,
+		sinks: []SinkFn{LogSinkJSON(re, "results.out")},
 	})
 
 	return re
@@ -131,27 +143,31 @@ func (re *RunEnv) manageAssets() {
 
 func (re *RunEnv) Close() error {
 	var err *multierror.Error
+
+	// close diagnostics; this stops the ticker and any further observations on
+	// runenv.D() will fail/panic.
 	err = multierror.Append(re.diagnostics.Close())
+
+	// close results; no more results via runenv.R() can be recorded.
 	err = multierror.Append(re.results.Close())
 
-	if l := re.logger; l != nil {
-		_ = l.SLogger().Sync()
+	if re.influxdb != nil {
+		// Next, we reopen the results.out file, and upload all points to InfluxDB
+		// using the blocking API.
+		results, err2 := os.OpenFile(filepath.Join(re.TestOutputsPath, "results.out"), os.O_RDONLY, 0666)
+		if err2 == nil {
+			// batchInsertInfluxDB will record errors via runenv.RecordMessage().
+			err2 = re.batchInsertInfluxDB(results)
+		}
+		err = multierror.Append(err, err2)
 	}
 
-	// Flush the diagnostics InfluxDB writer.
+	// Flush the immediate InfluxDB writer.
 	if re.wapi != nil {
 		re.wapi.Flush()
 		re.wapi.Close()
 	}
 
-	// Next, we reopen the results.out file, and upload all points to InfluxDB
-	// using the blocking API.
-	results, err2 := os.OpenFile(filepath.Join(re.TestOutputsPath, "results.out"), os.O_RDONLY, 0666)
-	if err2 == nil {
-		err2 = re.batchInsertInfluxDB(results)
-	}
-	err = multierror.Append(err, err2)
-
 	// This close stops monitoring the wapi errors channel, and closes assets.
 	close(re.closeCh)
 	re.wg.Wait()
@@ -162,17 +178,14 @@ func (re *RunEnv) Close() error {
 		re.influxdb.Close()
 	}
 
+	if l := re.logger; l != nil {
+		_ = l.Sync()
+	}
+
 	return err.ErrorOrNil()
 }
 
 func (re *RunEnv) batchInsertInfluxDB(results *os.File) error {
-	tags := map[string]string{
-		"plan":     re.TestPlan,
-		"case":     re.TestCase,
-		"run":      re.TestRun,
-		"group_id": re.TestGroupID,
-	}
-
 	var (
 		count  int
 		points []*influxdb2.Point
@@ -187,7 +200,7 @@ func (re *RunEnv) batchInsertInfluxDB(results *os.File) error {
 		}
 
 		// NewPoint copies all tags and fields, so this is thread-safe.
-		p := influxdb2.NewPoint(m.Name, tags, m.Measures, time.Unix(0, m.Timestamp))
+		p := influxdb2.NewPoint(m.Name, re.tags, m.Measures, time.Unix(0, m.Timestamp))
 		p.AddTag("type", m.Type.String())
 		points = append(points, p)
 		count++
diff --git a/runtime/files.go b/runtime/runenv_assets.go
similarity index 100%
rename from runtime/files.go
rename to runtime/runenv_assets.go
diff --git a/runtime/events.go b/runtime/runenv_events.go
similarity index 66%
rename from runtime/events.go
rename to runtime/runenv_events.go
index e87ab74..4de67c1 100644
--- a/runtime/events.go
+++ b/runtime/runenv_events.go
@@ -3,7 +3,9 @@ package runtime
 import (
 	"fmt"
 	"runtime/debug"
+	"time"
 
+	influxdb2 "github.com/influxdata/influxdb-client-go"
 	"go.uber.org/zap"
 	"go.uber.org/zap/zapcore"
 )
@@ -59,6 +61,7 @@ func (e Event) MarshalLogObject(oe zapcore.ObjectEncoder) error {
 func (rp *RunParams) MarshalLogObject(oe zapcore.ObjectEncoder) error {
 	oe.AddString("plan", rp.TestPlan)
 	oe.AddString("case", rp.TestCase)
+	oe.AddString("run", rp.TestRun)
 	if err := oe.AddReflected("params", rp.TestInstanceParams); err != nil {
 		return err
 	}
@@ -68,7 +71,7 @@ func (rp *RunParams) MarshalLogObject(oe zapcore.ObjectEncoder) error {
 		if rp.TestSubnet == nil {
 			return ""
 		}
-		return rp.TestSubnet.Network()
+		return rp.TestSubnet.String()
 	}())
 
 	oe.AddString("group", rp.TestGroupID)
@@ -90,7 +93,7 @@ func (rp *RunParams) MarshalLogObject(oe zapcore.ObjectEncoder) error {
 }
 
 // RecordMessage records an informational message.
-func (l *logger) RecordMessage(msg string, a ...interface{}) {
+func (re *RunEnv) RecordMessage(msg string, a ...interface{}) {
 	if len(a) > 0 {
 		msg = fmt.Sprintf(msg, a...)
 	}
@@ -98,46 +101,78 @@ func (l *logger) RecordMessage(msg string, a ...interface{}) {
 		Type:    EventTypeMessage,
 		Message: msg,
 	}
-	l.logger.Info("", zap.Object("event", evt))
+	re.logger.Info("", zap.Object("event", evt))
 }
 
-func (l *logger) RecordStart() {
+func (re *RunEnv) RecordStart() {
 	evt := Event{
 		Type:   EventTypeStart,
-		Runenv: l.runenv,
+		Runenv: &re.RunParams,
 	}
 
-	l.logger.Info("", zap.Object("event", evt))
+	re.logger.Info("", zap.Object("event", evt))
+	if re.wapi != nil {
+		f := map[string]interface{}{
+			"type": "instance.start",
+		}
+		p := influxdb2.NewPoint("lifecycle", re.tags, f, time.Now())
+		re.wapi.WritePoint(p)
+	}
 }
 
 // RecordSuccess records that the calling instance succeeded.
-func (l *logger) RecordSuccess() {
+func (re *RunEnv) RecordSuccess() {
 	evt := Event{
 		Type:    EventTypeFinish,
 		Outcome: EventOutcomeOK,
 	}
-	l.logger.Info("", zap.Object("event", evt))
+	re.logger.Info("", zap.Object("event", evt))
+	if re.wapi != nil {
+		f := map[string]interface{}{
+			"type": "instance.success",
+		}
+		p := influxdb2.NewPoint("lifecycle", re.tags, f, time.Now())
+		re.wapi.WritePoint(p)
+	}
 }
 
 // RecordFailure records that the calling instance failed with the supplied
 // error.
-func (l *logger) RecordFailure(err error) {
+func (re *RunEnv) RecordFailure(err error) {
 	evt := Event{
 		Type:    EventTypeFinish,
 		Outcome: EventOutcomeFailed,
 		Error:   err.Error(),
 	}
-	l.logger.Info("", zap.Object("event", evt))
+	re.logger.Info("", zap.Object("event", evt))
+	if re.wapi != nil {
+		f := map[string]interface{}{
+			"type":    "instance.finish",
+			"outcome": "failed",
+			"error":   err.Error(),
+		}
+		p := influxdb2.NewPoint("lifecycle", re.tags, f, time.Now())
+		re.wapi.WritePoint(p)
+	}
 }
 
 // RecordCrash records that the calling instance crashed/panicked with the
 // supplied error.
-func (l *logger) RecordCrash(err interface{}) {
+func (re *RunEnv) RecordCrash(err interface{}) {
 	evt := Event{
 		Type:       EventTypeFinish,
 		Outcome:    EventOutcomeCrashed,
 		Error:      fmt.Sprintf("%s", err),
 		Stacktrace: string(debug.Stack()),
 	}
-	l.logger.Error("", zap.Object("event", evt))
+	re.logger.Error("", zap.Object("event", evt))
+	if re.wapi != nil {
+		f := map[string]interface{}{
+			"type":    "instance.finish",
+			"outcome": "crash",
+			"error":   fmt.Sprintf("%s", err),
+		}
+		p := influxdb2.NewPoint("lifecycle", re.tags, f, time.Now())
+		re.wapi.WritePoint(p)
+	}
 }
diff --git a/runtime/logger.go b/runtime/runenv_logger.go
similarity index 57%
rename from runtime/logger.go
rename to runtime/runenv_logger.go
index 8c7668e..a060b43 100644
--- a/runtime/logger.go
+++ b/runtime/runenv_logger.go
@@ -8,28 +8,15 @@ import (
 	"go.uber.org/zap/zapcore"
 )
 
-type logger struct {
-	runenv *RunParams
-
-	logger  *zap.Logger
-	slogger *zap.SugaredLogger
-}
-
-func newLogger(runenv *RunParams) *logger {
-	l := &logger{runenv: runenv}
-	l.init()
-	return l
-}
-
-func (l *logger) init() {
+func (re *RunEnv) initLogger() {
 	level := zap.NewAtomicLevel()
 
 	if lvl := os.Getenv("LOG_LEVEL"); lvl != "" {
 		if err := level.UnmarshalText([]byte(lvl)); err != nil {
 			defer func() {
 				// once the logger is defined...
-				if l.slogger != nil {
-					l.slogger.Errorf("failed to decode log level '%q': %s", l, err)
+				if re.logger != nil {
+					re.logger.Sugar().Errorf("failed to decode log level '%q': %s", lvl, err)
 				}
 			}()
 		}
@@ -38,8 +25,8 @@ func (l *logger) init() {
 	}
 
 	paths := []string{"stdout"}
-	if l.runenv.TestOutputsPath != "" {
-		paths = append(paths, filepath.Join(l.runenv.TestOutputsPath, "run.out"))
+	if re.TestOutputsPath != "" {
+		paths = append(paths, filepath.Join(re.TestOutputsPath, "run.out"))
 	}
 
 	cfg := zap.Config{
@@ -50,8 +37,8 @@ func (l *logger) init() {
 		OutputPaths:       paths,
 		Encoding:          "json",
 		InitialFields: map[string]interface{}{
-			"run_id":   l.runenv.TestRun,
-			"group_id": l.runenv.TestGroupID,
+			"run_id":   re.TestRun,
+			"group_id": re.TestGroupID,
 		},
 	}
 
@@ -61,14 +48,8 @@ func (l *logger) init() {
 	cfg.EncoderConfig = enc
 
 	var err error
-	l.logger, err = cfg.Build()
+	re.logger, err = cfg.Build()
 	if err != nil {
 		panic(err)
 	}
-
-	l.slogger = l.logger.Sugar()
-}
-
-func (l *logger) SLogger() *zap.SugaredLogger {
-	return l.slogger
 }
diff --git a/runtime/runenv_test.go b/runtime/runenv_test.go
index b7053a2..45bd6ba 100644
--- a/runtime/runenv_test.go
+++ b/runtime/runenv_test.go
@@ -1,10 +1,23 @@
 package runtime
 
 import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
 	"reflect"
+	"strings"
 	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
 )
 
+func init() {
+	_ = os.Setenv("INFLUXDB_URL", "http://localhost:9999")
+}
+
 func TestParseKeyValues(t *testing.T) {
 	type args struct {
 		in []string
@@ -67,3 +80,150 @@ func TestParseKeyValues(t *testing.T) {
 		})
 	}
 }
+
+func TestAllEvents(t *testing.T) {
+	re, cleanup := RandomTestRunEnv(t)
+	t.Cleanup(cleanup)
+
+	re.RecordStart()
+	re.RecordFailure(fmt.Errorf("bang"))
+	re.RecordCrash(fmt.Errorf("terrible bang"))
+	re.RecordMessage("i have something to %s", "say")
+	re.RecordSuccess()
+
+	if err := re.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	file, err := os.OpenFile(re.TestOutputsPath+"/run.out", os.O_RDONLY, 0644)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer file.Close()
+
+	require := require.New(t)
+
+	var i int
+	for dec := json.NewDecoder(file); dec.More(); {
+		var m = struct {
+			Event Event `json:"event"`
+		}{}
+		if err := dec.Decode(&m); err != nil {
+			t.Fatal(err)
+		}
+
+		switch evt := m.Event; i {
+		case 0:
+			require.Equal(EventTypeMessage, evt.Type)
+			require.Condition(func() bool { return strings.HasPrefix(evt.Message, "InfluxDB unavailable") })
+		case 1:
+			require.Equal(EventTypeStart, evt.Type)
+			require.Equal(evt.Runenv.TestPlan, re.TestPlan)
+			require.Equal(evt.Runenv.TestCase, re.TestCase)
+			require.Equal(evt.Runenv.TestRun, re.TestRun)
+			require.Equal(evt.Runenv.TestGroupID, re.TestGroupID)
+		case 2:
+			require.Equal(EventTypeFinish, evt.Type)
+			require.Equal(EventOutcomeFailed, evt.Outcome)
+			require.Equal("bang", evt.Error)
+		case 3:
+			require.Equal(EventTypeFinish, evt.Type)
+			require.Equal(EventOutcomeCrashed, evt.Outcome)
+			require.Equal("terrible bang", evt.Error)
+			require.NotEmpty(evt.Stacktrace)
+		case 4:
+			require.Equal(EventTypeMessage, evt.Type)
+		case 5:
+			require.Equal(evt.Type, EventTypeFinish)
+			require.Equal(evt.Outcome, EventOutcomeOK)
+		}
+		i++
+	}
+}
+
+func TestDiagnosticsMetricsRecorded(t *testing.T) {
+	test := func(f func(*RunEnv) *MetricsApi, file string) func(t *testing.T) {
+		return func(t *testing.T) {
+			re, cleanup := RandomTestRunEnv(t)
+			t.Cleanup(cleanup)
+
+			api := f(re)
+
+			names := []string{"point1", "point2", "counter1", "meter1", "timer1"}
+			types := []string{"point", "counter", "meter", "timer"}
+			api.SetFrequency(200 * time.Millisecond)
+			api.RecordPoint("point1", 123)
+			api.RecordPoint("point2", 123)
+			api.NewCounter("counter1").Inc(50)
+			api.NewMeter("meter1").Mark(50)
+			api.NewTimer("timer1").Update(5 * time.Second)
+
+			time.Sleep(1 * time.Second)
+
+			_ = re.Close()
+
+			file, err := os.OpenFile(filepath.Join(re.TestOutputsPath, file), os.O_RDONLY, 0644)
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer file.Close()
+
+			var metrics []*Metric
+			for dec := json.NewDecoder(file); dec.More(); {
+				var m *Metric
+				if err := dec.Decode(&m); err != nil {
+					t.Fatal(err)
+				}
+				metrics = append(metrics, m)
+			}
+
+			require := require.New(t)
+
+			na := make(map[string]struct{})
+			ty := make(map[string]struct{})
+			for _, m := range metrics {
+				require.Greater(m.Timestamp, int64(0))
+				na[m.Name] = struct{}{}
+				ty[m.Type.String()] = struct{}{}
+				require.NotZero(len(m.Measures))
+			}
+
+			namesActual := make([]string, 0, len(na))
+			for k := range na {
+				namesActual = append(namesActual, k)
+			}
+
+			typesActual := make([]string, 0, len(ty))
+			for k := range ty {
+				typesActual = append(typesActual, k)
+			}
+
+			require.ElementsMatch(names, namesActual)
+			require.ElementsMatch(types, typesActual)
+		}
+	}
+
+	t.Run("diagnostics", test((*RunEnv).D, "diagnostics.out"))
+	t.Run("results", test((*RunEnv).R, "results.out"))
+}
+
+func TestDiagnosticsDispatchedToInfluxDB(t *testing.T) {
+	skipIfNoLocalInfluxDB(t)
+
+	re, cleanup := RandomTestRunEnv(t)
+	t.Cleanup(cleanup)
+
+	re.D().RecordPoint("foo", 1234)
+
+	_ = re.Close()
+}
+
+func skipIfNoLocalInfluxDB(t *testing.T) {
+	if client, err := NewInfluxDBClient(); err != nil {
+		t.Skip()
+	} else {
+		client.Close()
+		setup, err := client.Setup(context.Background(), "foo", "foo", "testground", "foo", 0)
+		fmt.Println(setup, err)
+	}
+}
diff --git a/runtime/sinks.go b/runtime/sinks.go
index 1d9fefd..c4b54c6 100644
--- a/runtime/sinks.go
+++ b/runtime/sinks.go
@@ -19,19 +19,12 @@ func LogSinkJSON(re *RunEnv, filename string) SinkFn {
 	}
 }
 
-func WriteToInfluxDB(re *RunEnv, w influxdb2.WriteApi) SinkFn {
-	tags := map[string]string{
-		"plan":     re.TestPlan,
-		"case":     re.TestCase,
-		"run":      re.TestRun,
-		"group_id": re.TestGroupID,
-	}
-
+func WriteToInfluxDB(re *RunEnv) SinkFn {
 	return func(m *Metric) error {
 		// NewPoint copies all tags and fields, so this is thread-safe.
-		p := influxdb2.NewPoint(m.Name, tags, m.Measures, time.Unix(0, m.Timestamp))
+		p := influxdb2.NewPoint(m.Name, re.tags, m.Measures, time.Unix(0, m.Timestamp))
 		p.AddTag("type", m.Type.String())
-		w.WritePoint(p)
+		re.wapi.WritePoint(p)
 		return nil
 	}
 }
diff --git a/runtime/test_utils.go b/runtime/test_utils.go
new file mode 100644
index 0000000..f136d21
--- /dev/null
+++ b/runtime/test_utils.go
@@ -0,0 +1,45 @@
+package runtime
+
+import (
+	"fmt"
+	"io/ioutil"
+	"math/rand"
+	"net"
+	"os"
+	"testing"
+	"time"
+)
+
+// RandomTestRunEnv generates a random RunEnv for testing purposes.
+func RandomTestRunEnv(t *testing.T) (re *RunEnv, cleanup func()) {
+	t.Helper()
+
+	b := make([]byte, 32)
+	_, _ = rand.Read(b)
+
+	_, subnet, _ := net.ParseCIDR("127.1.0.1/16")
+
+	odir, err := ioutil.TempDir("", "testground-tests-*")
+	if err != nil {
+		t.Fatalf("failed to create temp output dir: %s", err)
+	}
+
+	rp := RunParams{
+		TestPlan:               fmt.Sprintf("testplan-%d", rand.Uint32()),
+		TestSidecar:            false,
+		TestCase:               fmt.Sprintf("testcase-%d", rand.Uint32()),
+		TestRun:                fmt.Sprintf("testrun-%d", rand.Uint32()),
+		TestSubnet:             &IPNet{IPNet: *subnet},
+		TestInstanceCount:      int(1 + (rand.Uint32() % 999)),
+		TestInstanceRole:       "",
+		TestInstanceParams:     make(map[string]string),
+		TestGroupID:            fmt.Sprintf("group-%d", rand.Uint32()),
+		TestStartTime:          time.Now(),
+		TestGroupInstanceCount: int(1 + (rand.Uint32() % 999)),
+		TestOutputsPath:        odir,
+	}
+
+	return NewRunEnv(rp), func() {
+		_ = os.RemoveAll(odir)
+	}
+}
diff --git a/sync/barrier_test.go b/sync/barrier_test.go
index dbaf6aa..20ac4e4 100644
--- a/sync/barrier_test.go
+++ b/sync/barrier_test.go
@@ -7,14 +7,14 @@ import (
 
 	"golang.org/x/sync/errgroup"
 
-	"github.com/testground/sdk-go/test"
+	"github.com/testground/sdk-go/runtime"
 )
 
 func TestBarrier(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv, cleanup := test.RandomRunEnv(t)
+	runenv, cleanup := runtime.RandomTestRunEnv(t)
 	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
@@ -47,7 +47,7 @@ func TestBarrierBeyondTarget(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv, cleanup := test.RandomRunEnv(t)
+	runenv, cleanup := runtime.RandomTestRunEnv(t)
 	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
@@ -75,7 +75,7 @@ func TestBarrierZero(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv, cleanup := test.RandomRunEnv(t)
+	runenv, cleanup := runtime.RandomTestRunEnv(t)
 	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
@@ -102,7 +102,7 @@ func TestBarrierCancel(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv, cleanup := test.RandomRunEnv(t)
+	runenv, cleanup := runtime.RandomTestRunEnv(t)
 	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
@@ -130,7 +130,7 @@ func TestBarrierDeadline(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv, cleanup := test.RandomRunEnv(t)
+	runenv, cleanup := runtime.RandomTestRunEnv(t)
 	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
@@ -160,7 +160,7 @@ func TestSignalAndWait(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
-	runenv, cleanup := test.RandomRunEnv(t)
+	runenv, cleanup := runtime.RandomTestRunEnv(t)
 	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
@@ -183,7 +183,7 @@ func TestSignalAndWait(t *testing.T) {
 }
 
 func TestSignalAndWaitTimeout(t *testing.T) {
-	runenv, cleanup := test.RandomRunEnv(t)
+	runenv, cleanup := runtime.RandomTestRunEnv(t)
 	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(context.Background(), runenv)
diff --git a/sync/gc_test.go b/sync/gc_test.go
index e9b186b..b9b1c36 100644
--- a/sync/gc_test.go
+++ b/sync/gc_test.go
@@ -8,7 +8,7 @@ import (
 	"testing"
 	"time"
 
-	"github.com/testground/sdk-go/test"
+	"github.com/testground/sdk-go/runtime"
 )
 
 func TestGC(t *testing.T) {
@@ -17,7 +17,7 @@ func TestGC(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 	defer cancel()
 
-	runenv, cleanup := test.RandomRunEnv(t)
+	runenv, cleanup := runtime.RandomTestRunEnv(t)
 	t.Cleanup(cleanup)
 
 	client, err := NewBoundClient(ctx, runenv)
diff --git a/sync/generic_client_test.go b/sync/generic_client_test.go
index 598e56e..232df6e 100644
--- a/sync/generic_client_test.go
+++ b/sync/generic_client_test.go
@@ -8,7 +8,7 @@ import (
 
 	"go.uber.org/zap"
 
-	"github.com/testground/sdk-go/test"
+	"github.com/testground/sdk-go/runtime"
 )
 
 // TestGenericClientRunEnv checks that states and payloads published by a bound
@@ -28,7 +28,7 @@ func TestGenericClientRunEnv(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
 
-	runenv, cleanup := test.RandomRunEnv(t)
+	runenv, cleanup := runtime.RandomTestRunEnv(t)
 	t.Cleanup(cleanup)
 
 	bclient, err := NewBoundClient(ctx, runenv)
diff --git a/sync/topic_test.go b/sync/topic_test.go
index bb221ca..1bbe26f 100644
--- a/sync/topic_test.go
+++ b/sync/topic_test.go
@@ -8,7 +8,7 @@ import (
 
 	"golang.org/x/sync/errgroup"
 
-	"github.com/testground/sdk-go/test"
+	"github.com/testground/sdk-go/runtime"
 )
 
 type TestPayload struct {
@@ -22,7 +22,7 @@ type TestPayload struct {
 func TestSubscribeAfterAllPublished(t *testing.T) {
 	var (
 		iterations      = 1000
-		runenv, cleanup = test.RandomRunEnv(t)
+		runenv, cleanup = runtime.RandomTestRunEnv(t)
 	)
 
 	t.Cleanup(cleanup)
@@ -69,7 +69,7 @@ func TestSubscribeAfterAllPublished(t *testing.T) {
 func TestSubscribeFirstConcurrentWrites(t *testing.T) {
 	var (
 		iterations      = 1000
-		runenv, cleanup = test.RandomRunEnv(t)
+		runenv, cleanup = runtime.RandomTestRunEnv(t)
 	)
 
 	t.Cleanup(cleanup)
@@ -126,7 +126,7 @@ func TestSubscriptionConcurrentPublishersSubscribers(t *testing.T) {
 	var (
 		topics          = 100
 		iterations      = 100
-		runenv, cleanup = test.RandomRunEnv(t)
+		runenv, cleanup = runtime.RandomTestRunEnv(t)
 	)
 
 	t.Cleanup(cleanup)
@@ -177,7 +177,7 @@ func TestSubscriptionConcurrentPublishersSubscribers(t *testing.T) {
 }
 
 func TestSubscriptionValidation(t *testing.T) {
-	runenv, cleanup := test.RandomRunEnv(t)
+	runenv, cleanup := runtime.RandomTestRunEnv(t)
 
 	t.Cleanup(cleanup)
 
@@ -217,7 +217,7 @@ func TestSequenceOnWrite(t *testing.T) {
 	var (
 		iterations      = 1000
 		topic           = &Topic{name: "pandemic", typ: reflect.TypeOf("")}
-		runenv, cleanup = test.RandomRunEnv(t)
+		runenv, cleanup = runtime.RandomTestRunEnv(t)
 	)
 
 	t.Cleanup(cleanup)

From 1ac1cecf7847a76d8ed45a5d56443e60b1f8a9b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Wed, 29 Apr 2020 15:05:56 +0100
Subject: [PATCH 05/13] switch to influxdb v1; implement batching; tests.

---
 runtime/influxdb.go             |  42 --------
 runtime/influxdb_batch.go       | 175 ++++++++++++++++++++++++++++++++
 runtime/influxdb_batch_test.go  | 151 +++++++++++++++++++++++++++
 runtime/influxdb_client.go      |  50 +++++++++
 runtime/influxdb_client_test.go |  55 ++++++++++
 runtime/metrics_api.go          |   8 +-
 runtime/metrics_sinks.go        |  31 ++++++
 runtime/runenv.go               | 110 ++++++++------------
 runtime/runenv_events.go        |  65 ++++++------
 runtime/runenv_test.go          |  70 +++++++++++--
 runtime/sinks.go                |  30 ------
 test/runenv.go                  |  47 ---------
 12 files changed, 600 insertions(+), 234 deletions(-)
 delete mode 100644 runtime/influxdb.go
 create mode 100644 runtime/influxdb_batch.go
 create mode 100644 runtime/influxdb_batch_test.go
 create mode 100644 runtime/influxdb_client.go
 create mode 100644 runtime/influxdb_client_test.go
 create mode 100644 runtime/metrics_sinks.go
 delete mode 100644 runtime/sinks.go
 delete mode 100644 test/runenv.go

diff --git a/runtime/influxdb.go b/runtime/influxdb.go
deleted file mode 100644
index aa9e0a4..0000000
--- a/runtime/influxdb.go
+++ /dev/null
@@ -1,42 +0,0 @@
-package runtime
-
-import (
-	"context"
-	"fmt"
-	"os"
-	"time"
-
-	influxdb2 "github.com/influxdata/influxdb-client-go"
-)
-
-const (
-	EnvInfluxDBURL       = "INFLUXDB_URL"
-	EnvInfluxDBAuthToken = "INFLUXDB_AUTH"
-)
-
-func NewInfluxDBClient() (influxdb2.InfluxDBClient, error) {
-	url := os.Getenv(EnvInfluxDBURL)
-	if url == "" {
-		return nil, fmt.Errorf("no InfluxDB URL in $%s env var", EnvInfluxDBURL)
-	}
-
-	auth := os.Getenv(EnvInfluxDBAuthToken)
-	if auth == "" {
-		return nil, fmt.Errorf("no InfluxDB auth token in $%s env var", EnvInfluxDBAuthToken)
-	}
-
-	opts := influxdb2.DefaultOptions()
-	opts.SetMaxRetries(10)
-	opts.SetHttpRequestTimeout(30)
-	opts.SetUseGZip(true)
-
-	client := influxdb2.NewClientWithOptions(url, auth, opts)
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-
-	if ok, err := client.Ready(ctx); err != nil || !ok {
-		return nil, fmt.Errorf("influxdb not ready: %w", err)
-	}
-
-	return client, nil
-}
diff --git a/runtime/influxdb_batch.go b/runtime/influxdb_batch.go
new file mode 100644
index 0000000..57dd746
--- /dev/null
+++ b/runtime/influxdb_batch.go
@@ -0,0 +1,175 @@
+package runtime
+
+import (
+	"io"
+	"time"
+
+	"github.com/avast/retry-go"
+	_ "github.com/influxdata/influxdb1-client"
+	client "github.com/influxdata/influxdb1-client/v2"
+)
+
+type Batcher interface {
+	io.Closer
+
+	WritePoint(p *client.Point)
+}
+
+type batcher struct {
+	re        *RunEnv
+	client    client.Client
+	length    int
+	interval  time.Duration
+	retryOpts []retry.Option
+
+	writeCh chan *client.Point
+	flushCh chan struct{}
+	doneCh  chan struct{}
+
+	pending []*client.Point
+	sending []*client.Point
+	sendRes chan error
+	doneErr chan error
+}
+
+func newBatcher(re *RunEnv, cli client.Client, length int, interval time.Duration, retry ...retry.Option) *batcher {
+	b := &batcher{
+		re:        re,
+		client:    cli,
+		length:    length,
+		interval:  interval,
+		retryOpts: retry,
+
+		writeCh: make(chan *client.Point),
+		flushCh: make(chan struct{}, 1),
+		sendRes: make(chan error, 1),
+		doneCh:  make(chan struct{}),
+		doneErr: make(chan error),
+
+		pending: nil,
+		sending: nil,
+	}
+
+	go b.background()
+
+	return b
+}
+
+func (b *batcher) background() {
+	tick := time.NewTicker(b.interval)
+	defer tick.Stop()
+
+	attemptFlush := func() {
+		if b.sending != nil {
+			// there's already a flush taking place.
+			return
+		}
+		select {
+		case b.flushCh <- struct{}{}:
+		default:
+			// there's a flush queued to be accepted.
+		}
+	}
+
+	for {
+		select {
+		case p := <-b.writeCh:
+			b.pending = append(b.pending, p)
+			if len(b.pending) >= b.length {
+				attemptFlush()
+			}
+
+		case err := <-b.sendRes:
+			if err == nil {
+				b.pending = b.pending[len(b.sending):]
+			}
+			b.sending = nil
+			if len(b.pending) >= b.length {
+				attemptFlush()
+			}
+
+		case <-tick.C:
+			attemptFlush()
+
+		case <-b.flushCh:
+			if b.sending != nil {
+				continue
+			}
+			l := len(b.pending)
+			if l == 0 {
+				continue
+			}
+			if l > b.length {
+				l = b.length
+			}
+			b.sending = b.pending[:l]
+			go b.send()
+
+		case <-b.doneCh:
+			if b.sending != nil {
+				// we are currently sending, wait for the send to finish first.
+				if err := <-b.sendRes; err == nil {
+					b.pending = b.pending[len(b.sending):]
+				}
+			}
+
+			var err error
+			if len(b.pending) > 0 {
+				// send all remaining data at once.
+				b.sending = b.pending
+				go b.send()
+				err = <-b.sendRes
+			}
+			b.doneErr <- err
+			return
+		}
+	}
+}
+
+func (b *batcher) WritePoint(p *client.Point) {
+	b.writeCh <- p
+}
+
+// Close flushes any remaining points and returns any errors from the final flush.
+func (b *batcher) Close() error {
+	select {
+	case _, ok := <-b.doneCh:
+		if !ok {
+			return nil
+		}
+	default:
+	}
+	close(b.doneCh)
+	return <-b.doneErr
+}
+
+func (b *batcher) send() {
+	points, err := client.NewBatchPoints(client.BatchPointsConfig{Database: "testground"})
+	if err != nil {
+		b.sendRes <- err
+		return
+	}
+
+	for _, p := range b.sending {
+		points.AddPoint(p)
+	}
+
+	err = retry.Do(func() error { return b.client.Write(points) }, b.retryOpts...)
+	b.sendRes <- err
+}
+
+type nilBatcher struct {
+	client.Client
+}
+
+func (n *nilBatcher) WritePoint(p *client.Point) {
+	bp, _ := client.NewBatchPoints(client.BatchPointsConfig{Database: "testground"})
+	bp.AddPoint(p)
+	_ = n.Write(bp)
+}
+
+func (n *nilBatcher) Close() error {
+	return nil
+}
+
+var _ Batcher = (*nilBatcher)(nil)
diff --git a/runtime/influxdb_batch_test.go b/runtime/influxdb_batch_test.go
new file mode 100644
index 0000000..99acbce
--- /dev/null
+++ b/runtime/influxdb_batch_test.go
@@ -0,0 +1,151 @@
+package runtime
+
+import (
+	"testing"
+	"time"
+
+	"github.com/avast/retry-go"
+	client "github.com/influxdata/influxdb1-client/v2"
+	"github.com/stretchr/testify/require"
+)
+
+func TestLengthBatching(t *testing.T) {
+	re, cleanup := RandomTestRunEnv(t)
+	t.Cleanup(cleanup)
+
+	tc := &testClient{}
+	b := newBatcher(re, tc, 16, 24*time.Hour)
+
+	writePoints(t, b, 0, 36)
+
+	time.Sleep(1 * time.Second)
+
+	require := require.New(t)
+
+	// we should've received two batches.
+	tc.RLock()
+	require.Len(tc.batchPoints, 2)
+	require.Len(tc.batchPoints[0].Points(), 16)
+	require.Len(tc.batchPoints[1].Points(), 16)
+	tc.RUnlock()
+
+	require.NoError(b.Close())
+	tc.RLock()
+	require.Len(tc.batchPoints, 3)
+	require.Len(tc.batchPoints[2].Points(), 4)
+	tc.RUnlock()
+}
+
+func TestIntervalBatching(t *testing.T) {
+	re, cleanup := RandomTestRunEnv(t)
+	t.Cleanup(cleanup)
+
+	tc := &testClient{}
+	b := newBatcher(re, tc, 1000, 500*time.Millisecond)
+
+	writePoints(t, b, 0, 10)
+
+	time.Sleep(2 * time.Second)
+
+	require := require.New(t)
+
+	// we should've received two batches.
+	tc.RLock()
+	require.Len(tc.batchPoints, 1)
+	require.Len(tc.batchPoints[0].Points(), 10)
+	tc.RUnlock()
+
+	require.NoError(b.Close())
+	tc.RLock()
+	require.Len(tc.batchPoints, 1)
+	tc.RUnlock()
+}
+
+func TestBatchFailure(t *testing.T) {
+	re, cleanup := RandomTestRunEnv(t)
+	t.Cleanup(cleanup)
+
+	test := func(b *batcher) func(t *testing.T) {
+		tc := &testClient{}
+		b.client = tc
+
+		return func(t *testing.T) {
+
+			// Enable failures.
+			tc.EnableFail(true)
+
+			// Write three batches of 10 points each.
+			writePoints(t, b, 0, 10)
+			writePoints(t, b, 10, 10)
+			writePoints(t, b, 20, 10)
+
+			time.Sleep(2 * time.Second)
+
+			require := require.New(t)
+
+			// we should've received the same batch many times.
+			tc.RLock()
+			require.Greater(len(tc.batchPoints), 1)
+			assertPointsExactly(t, tc.batchPoints[0], 0, 10)
+			tc.RUnlock()
+
+			// get out of failure mode.
+			tc.EnableFail(false)
+
+			// wait for the retries to be done.
+			time.Sleep(2 * time.Second)
+
+			// now the last four elements should be:
+			// batch(0-9) (failed), batch(0-9) (ok), batch(10-19) (ok), batch(20-29) (ok)
+			tc.RLock()
+			require.Greater(len(tc.batchPoints), 1)
+			assertPointsExactly(t, tc.batchPoints[len(tc.batchPoints)-4], 0, 10)
+			assertPointsExactly(t, tc.batchPoints[len(tc.batchPoints)-3], 0, 10)
+			assertPointsExactly(t, tc.batchPoints[len(tc.batchPoints)-2], 10, 10)
+			assertPointsExactly(t, tc.batchPoints[len(tc.batchPoints)-1], 20, 10)
+			tc.RUnlock()
+		}
+	}
+
+	t.Run("batches_by_length", test(newBatcher(re, nil, 10, 24*time.Hour,
+		retry.Attempts(3),
+		retry.Delay(100*time.Millisecond),
+	)))
+
+	t.Run("batches_by_time", test(newBatcher(re, nil, 10, 100*time.Millisecond,
+		retry.Attempts(3),
+		retry.Delay(100*time.Millisecond),
+	)))
+
+}
+
+func writePoints(t *testing.T, b *batcher, offset, count int) {
+	t.Helper()
+
+	for i := offset; i < offset+count; i++ {
+		tags := map[string]string{}
+		fields := map[string]interface{}{
+			"i": i,
+		}
+		p, err := client.NewPoint("point", tags, fields)
+		if err != nil {
+			t.Fatal(err)
+		}
+		b.WritePoint(p)
+	}
+}
+
+func assertPointsExactly(t *testing.T, bp client.BatchPoints, offset, length int) {
+	t.Helper()
+
+	if l := len(bp.Points()); l != length {
+		t.Fatalf("length did not match; expected: %d, got %d", length, l)
+	}
+
+	for i, p := range bp.Points() {
+		f, _ := p.Fields()
+		if actual := f["i"].(int64); int64(i+offset) != actual {
+			t.Fatalf("comparison failed; expected: %d, got %d", i+offset, actual)
+		}
+	}
+}
diff --git a/runtime/influxdb_client.go b/runtime/influxdb_client.go
new file mode 100644
index 0000000..0c0e763
--- /dev/null
+++ b/runtime/influxdb_client.go
@@ -0,0 +1,50 @@
+package runtime
+
+import (
+	"fmt"
+	"os"
+	"time"
+
+	"github.com/avast/retry-go"
+
+	_ "github.com/influxdata/influxdb1-client" // this is important because of the bug in go mod
+	client "github.com/influxdata/influxdb1-client/v2"
+)
+
+const EnvInfluxDBAddr = "INFLUXDB_ADDR"
+
+var (
+	// TestInfluxDBClient sets a client for testing. If this value is set,
+	// NewInfluxDBClient will always return it.
+	TestInfluxDBClient client.Client
+)
+
+func NewInfluxDBClient(re *RunEnv) (client.Client, error) {
+	if TestInfluxDBClient != nil {
+		return TestInfluxDBClient, nil
+	}
+
+	addr := os.Getenv(EnvInfluxDBAddr)
+	if addr == "" {
+		return nil, fmt.Errorf("no InfluxDB URL in $%s env var", EnvInfluxDBAddr)
+	}
+
+	cfg := client.HTTPConfig{Addr: addr, Timeout: 5}
+	client, err := client.NewHTTPClient(cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	ping := func() error {
+		_, _, err := client.Ping(2 * time.Second)
+		return err
+	}
+	err = retry.Do(ping,
+		retry.Attempts(5),
+		retry.MaxDelay(500*time.Millisecond),
+		retry.OnRetry(func(n uint, err error) {
+			re.RecordMessage("failed attempt number %d to ping InfluxDB at %s: %s", n, addr, err)
+		}),
+	)
+	return client, err
+}
diff --git a/runtime/influxdb_client_test.go b/runtime/influxdb_client_test.go
new file mode 100644
index 0000000..76e505b
--- /dev/null
+++ b/runtime/influxdb_client_test.go
@@ -0,0 +1,55 @@
+package runtime
+
+import (
+	"fmt"
+	"sync"
+	"time"
+
+	_ "github.com/influxdata/influxdb1-client"
+	client "github.com/influxdata/influxdb1-client/v2"
+)
+
+type testClient struct {
+	sync.RWMutex
+
+	fail        bool
+	batchPoints []client.BatchPoints
+}
+
+var _ client.Client = (*testClient)(nil)
+
+func (t *testClient) EnableFail(fail bool) {
+	t.Lock()
+	defer t.Unlock()
+
+	t.fail = fail
+}
+
+func (t *testClient) Ping(_ time.Duration) (time.Duration, string, error) {
+	return 0, "", nil
+}
+
+func (t *testClient) Write(bp client.BatchPoints) error {
+	t.Lock()
+	defer t.Unlock()
+
+	t.batchPoints = append(t.batchPoints, bp)
+
+	var err error
+	if t.fail {
+		err = fmt.Errorf("error")
+	}
+	return err
+}
+
+func (t *testClient) Query(_ client.Query) (*client.Response, error) {
+	return nil, nil
+}
+
+func (t *testClient) QueryAsChunk(_ client.Query) (*client.ChunkedResponse, error) {
+	return nil, nil
+}
+
+func (t *testClient) Close() error {
+	return nil
+}
diff --git a/runtime/metrics_api.go b/runtime/metrics_api.go
index 3b29000..2de5989 100644
--- a/runtime/metrics_api.go
+++ b/runtime/metrics_api.go
@@ -19,7 +19,7 @@ type (
 	Point     float64
 )
 
-type SinkFn func(m *Metric) error
+type MetricSinkFn func(m *Metric) error
 
 type MetricsApi struct {
 	// re is the RunEnv this MetricsApi object is attached to.
@@ -31,7 +31,7 @@ type MetricsApi struct {
 	// sinks to invoke when a new observation has been made.
 	//  1) data points are sent immediately.
 	//  2) aggregated metrics are sent periodically, based on freq.
-	sinks []SinkFn
+	sinks []MetricSinkFn
 
 	// freq is the frequency with which to materialize aggregated metrics.
 	freq time.Duration
@@ -43,7 +43,7 @@ type MetricsApi struct {
 
 type metricsApiOpts struct {
 	freq  time.Duration
-	sinks []SinkFn
+	sinks []MetricSinkFn
 }
 
 func newMetricsApi(re *RunEnv, opts metricsApiOpts) *MetricsApi {
@@ -111,7 +111,7 @@ func (m *MetricsApi) broadcast(name string, obj interface{}) {
 
 	for _, sink := range m.sinks {
 		if err := sink(metric); err != nil {
-			m.re.RecordMessage("failed to emit aggregated metric: %s", err)
+			m.re.RecordMessage("failed to emit metric: %s", err)
 		}
 	}
 }
diff --git a/runtime/metrics_sinks.go b/runtime/metrics_sinks.go
new file mode 100644
index 0000000..c29292c
--- /dev/null
+++ b/runtime/metrics_sinks.go
@@ -0,0 +1,31 @@
+package runtime
+
+import (
+	"encoding/json"
+	"time"
+
+	client "github.com/influxdata/influxdb1-client/v2"
+)
+
+func LogSinkJSON(re *RunEnv, filename string) MetricSinkFn {
+	f, err := re.CreateRawAsset(filename)
+	if err != nil {
+		panic(err)
+	}
+
+	enc := json.NewEncoder(f)
+	return func(m *Metric) error {
+		return enc.Encode(m)
+	}
+}
+
+func WriteToInfluxDBSink(re *RunEnv, name string) MetricSinkFn {
+	return func(m *Metric) error {
+		p, err := client.NewPoint(name, re.tags, m.Measures, time.Unix(0, m.Timestamp))
+		if err != nil {
+			return err
+		}
+		re.batcher.WritePoint(p)
+		return nil
+	}
+}
diff --git a/runtime/runenv.go b/runtime/runenv.go
index 38f1e92..3c412cc 100644
--- a/runtime/runenv.go
+++ b/runtime/runenv.go
@@ -1,7 +1,6 @@
 package runtime
 
 import (
-	"context"
 	"encoding/json"
 	"os"
 	"path/filepath"
@@ -10,10 +9,26 @@ import (
 
 	"github.com/avast/retry-go"
 	"github.com/hashicorp/go-multierror"
-	influxdb2 "github.com/influxdata/influxdb-client-go"
+	_ "github.com/influxdata/influxdb1-client" // this is important because of the bug in go mod
+	client "github.com/influxdata/influxdb1-client/v2"
 	"go.uber.org/zap"
 )
 
+var (
+	InfluxBatching       = true
+	InfluxBatchLength    = 128
+	InfluxBatchInterval  = 1 * time.Second
+	InfluxBatchRetryOpts = func(re *RunEnv) []retry.Option {
+		return []retry.Option{
+			retry.Attempts(5),
+			retry.Delay(500 * time.Millisecond),
+			retry.OnRetry(func(n uint, err error) {
+				re.RecordMessage("failed to send batch to InfluxDB; attempt %d; err: %s", n, err)
+			}),
+		}
+	}
+)
+
 // RunEnv encapsulates the context for this test run.
 type RunEnv struct {
 	RunParams
@@ -22,8 +37,8 @@ type RunEnv struct {
 
 	diagnostics *MetricsApi
 	results     *MetricsApi
-	influxdb    influxdb2.InfluxDBClient
-	wapi        influxdb2.WriteApi
+	influxdb    client.Client
+	batcher     Batcher
 	tags        map[string]string
 
 	wg        sync.WaitGroup
@@ -58,8 +73,8 @@ func NewRunEnv(params RunParams) *RunEnv {
 	re.wg.Add(1)
 	go re.manageAssets()
 
-	var dsinks = []SinkFn{LogSinkJSON(re, "diagnostics.out")}
-	client, err := NewInfluxDBClient()
+	var dsinks = []MetricSinkFn{LogSinkJSON(re, "diagnostics.out")}
+	client, err := NewInfluxDBClient(re)
 	if err == nil {
 		re.tags = map[string]string{
 			"plan":     re.TestPlan,
@@ -69,11 +84,13 @@ func NewRunEnv(params RunParams) *RunEnv {
 		}
 
 		re.influxdb = client
-		re.wapi = client.WriteApi("testground", "diagnostics")
-		dsinks = append(dsinks, WriteToInfluxDB(re))
+		if InfluxBatching {
+			re.batcher = newBatcher(re, client, InfluxBatchLength, InfluxBatchInterval, InfluxBatchRetryOpts(re)...)
+		} else {
+			re.batcher = &nilBatcher{client}
+		}
 
-		re.wg.Add(1)
-		go re.monitorInfluxDBErrors()
+		dsinks = append(dsinks, WriteToInfluxDBSink(re, "diagnostics"))
 	} else {
 		re.RecordMessage("InfluxDB unavailable; no metrics will be dispatched: %s", err)
 	}
@@ -85,7 +102,7 @@ func NewRunEnv(params RunParams) *RunEnv {
 
 	re.results = newMetricsApi(re, metricsApiOpts{
 		freq:  1 * time.Second,
-		sinks: []SinkFn{LogSinkJSON(re, "results.out")},
+		sinks: []MetricSinkFn{LogSinkJSON(re, "results.out")},
 	})
 
 	return re
@@ -101,22 +118,6 @@ func (re *RunEnv) D() *MetricsApi {
 	return re.diagnostics
 }
 
-func (re *RunEnv) monitorInfluxDBErrors() {
-	defer re.wg.Done()
-
-	for {
-		select {
-		case err := <-re.wapi.Errors():
-			if err == nil {
-				continue
-			}
-			re.RecordMessage("failed while writing to InfluxDB: %s", err)
-		case <-re.closeCh:
-			return
-		}
-	}
-}
-
 func (re *RunEnv) manageAssets() {
 	defer re.wg.Done()
 
@@ -152,20 +153,18 @@ func (re *RunEnv) Close() error {
 	err = multierror.Append(re.results.Close())
 
 	if re.influxdb != nil {
-		// Next, we reopen the results.out file, and upload all points to InfluxDB
-		// using the blocking API.
-		results, err2 := os.OpenFile(filepath.Join(re.TestOutputsPath, "results.out"), os.O_RDONLY, 0666)
-		if err2 == nil {
-			// batchInsertInfluxDB will record errors via runenv.RecordMessage().
-			err2 = re.batchInsertInfluxDB(results)
+		// Next, we reopen the results.out file, and write all points to InfluxDB.
+		results := filepath.Join(re.TestOutputsPath, "results.out")
+		if file, errf := os.OpenFile(results, os.O_RDONLY, 0666); errf == nil {
+			err = multierror.Append(err, re.batchInsertInfluxDB(file))
+		} else {
+			err = multierror.Append(err, errf)
 		}
-		err = multierror.Append(err, err2)
 	}
 
 	// Flush the immediate InfluxDB writer.
-	if re.wapi != nil {
-		re.wapi.Flush()
-		re.wapi.Close()
+	if re.batcher != nil {
+		err = multierror.Append(err, re.batcher.Close())
 	}
 
 	// This close stops monitoring the wapi errors channel, and closes assets.
@@ -175,7 +174,7 @@ func (re *RunEnv) Close() error {
 
 	// Now we're ready to close InfluxDB.
 	if re.influxdb != nil {
-		re.influxdb.Close()
+		err = multierror.Append(err, re.influxdb.Close())
 	}
 
 	if l := re.logger; l != nil {
@@ -186,12 +185,8 @@ func (re *RunEnv) Close() error {
 }
 
 func (re *RunEnv) batchInsertInfluxDB(results *os.File) error {
-	var (
-		count  int
-		points []*influxdb2.Point
-	)
+	sink := WriteToInfluxDBSink(re, "results")
 
-	wapib := re.influxdb.WriteApiBlocking("testground", "results")
 	for dec := json.NewDecoder(results); dec.More(); {
 		var m Metric
 		if err := dec.Decode(&m); err != nil {
@@ -199,33 +194,8 @@ func (re *RunEnv) batchInsertInfluxDB(results *os.File) error {
 			continue
 		}
 
-		// NewPoint copies all tags and fields, so this is thread-safe.
-		p := influxdb2.NewPoint(m.Name, re.tags, m.Measures, time.Unix(0, m.Timestamp))
-		p.AddTag("type", m.Type.String())
-		points = append(points, p)
-		count++
-
-		// upload a batch every 500 points, or if this is the last point.
-		if count%500 == 0 || !dec.More() {
-			logger := func(n uint, err error) {
-				re.RecordMessage("failed to upload result points on attempt %d to InfluxDB: %s", n, err)
-			}
-
-			write := func() error {
-				ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-				defer cancel()
-
-				return wapib.WritePoint(ctx, points...)
-			}
-
-			// retry 5 times, with a delay of 1 seconds, and the default jitter, logging each attempt
-			// into the runenv.
-			err := retry.Do(write, retry.Attempts(5), retry.Delay(1*time.Second), retry.OnRetry(logger))
-
-			if err != nil {
-				re.RecordMessage("failed completely to upload a batch of result points to InfluxDB: %s", err)
-			}
-			points = points[:0]
+		if err := sink(&m); err != nil {
+			re.RecordMessage("failed to process Metric from results.out: %s", err)
 		}
 	}
 	return nil
diff --git a/runtime/runenv_events.go b/runtime/runenv_events.go
index 4de67c1..8b4bb92 100644
--- a/runtime/runenv_events.go
+++ b/runtime/runenv_events.go
@@ -3,9 +3,8 @@ package runtime
 import (
 	"fmt"
 	"runtime/debug"
-	"time"
 
-	influxdb2 "github.com/influxdata/influxdb-client-go"
+	client "github.com/influxdata/influxdb1-client/v2"
 	"go.uber.org/zap"
 	"go.uber.org/zap/zapcore"
 )
@@ -111,12 +110,8 @@ func (re *RunEnv) RecordStart() {
 	}
 
 	re.logger.Info("", zap.Object("event", evt))
-	if re.wapi != nil {
-		f := map[string]interface{}{
-			"type": "instance.start",
-		}
-		p := influxdb2.NewPoint("lifecycle", re.tags, f, time.Now())
-		re.wapi.WritePoint(p)
+	if re.batcher != nil {
+		re.recordEventInInfluxDB("instance.start", "", nil)
 	}
 }
 
@@ -127,12 +122,8 @@ func (re *RunEnv) RecordSuccess() {
 		Outcome: EventOutcomeOK,
 	}
 	re.logger.Info("", zap.Object("event", evt))
-	if re.wapi != nil {
-		f := map[string]interface{}{
-			"type": "instance.success",
-		}
-		p := influxdb2.NewPoint("lifecycle", re.tags, f, time.Now())
-		re.wapi.WritePoint(p)
+	if re.batcher != nil {
+		re.recordEventInInfluxDB("instance.success", "", nil)
 	}
 }
 
@@ -145,14 +136,10 @@ func (re *RunEnv) RecordFailure(err error) {
 		Error:   err.Error(),
 	}
 	re.logger.Info("", zap.Object("event", evt))
-	if re.wapi != nil {
-		f := map[string]interface{}{
-			"type":    "instance.finish",
-			"outcome": "failed",
-			"error":   err.Error(),
-		}
-		p := influxdb2.NewPoint("lifecycle", re.tags, f, time.Now())
-		re.wapi.WritePoint(p)
+	if re.batcher != nil {
+		re.recordEventInInfluxDB("instance.finish", "failed", map[string]interface{}{
+			"error": err.Error(),
+		})
 	}
 }
 
@@ -166,13 +153,31 @@ func (re *RunEnv) RecordCrash(err interface{}) {
 		Stacktrace: string(debug.Stack()),
 	}
 	re.logger.Error("", zap.Object("event", evt))
-	if re.wapi != nil {
-		f := map[string]interface{}{
-			"type":    "instance.finish",
-			"outcome": "crash",
-			"error":   fmt.Sprintf("%s", err),
-		}
-		p := influxdb2.NewPoint("lifecycle", re.tags, f, time.Now())
-		re.wapi.WritePoint(p)
+	if re.batcher != nil {
+		re.recordEventInInfluxDB("instance.finish", "crash", map[string]interface{}{
+			"error": fmt.Sprintf("%s", err),
+		})
+	}
+}
+
+func (re *RunEnv) recordEventInInfluxDB(typ string, outcome string, f map[string]interface{}) {
+	// this map copy is terrible; the influxdb v2 SDK makes points mutable.
+	tags := make(map[string]string, len(re.tags)+1)
+	for k, v := range re.tags {
+		tags[k] = v
+	}
+	tags["type"] = typ
+	if outcome != "" {
+		tags["outcome"] = outcome
+	}
+
+	if f == nil {
+		f = map[string]interface{}{}
+	}
+
+	p, err := client.NewPoint("events", tags, f)
+	if err != nil {
+		re.RecordMessage("failed to create InfluxDB point: %s", err)
 	}
+	re.batcher.WritePoint(p)
 }
diff --git a/runtime/runenv_test.go b/runtime/runenv_test.go
index 45bd6ba..9da260a 100644
--- a/runtime/runenv_test.go
+++ b/runtime/runenv_test.go
@@ -1,7 +1,6 @@
 package runtime
 
 import (
-	"context"
 	"encoding/json"
 	"fmt"
 	"os"
@@ -141,7 +140,7 @@ func TestAllEvents(t *testing.T) {
 	}
 }
 
-func TestDiagnosticsMetricsRecorded(t *testing.T) {
+func TestMetricsRecordedInFile(t *testing.T) {
 	test := func(f func(*RunEnv) *MetricsApi, file string) func(t *testing.T) {
 		return func(t *testing.T) {
 			re, cleanup := RandomTestRunEnv(t)
@@ -208,22 +207,71 @@ func TestDiagnosticsMetricsRecorded(t *testing.T) {
 }
 
 func TestDiagnosticsDispatchedToInfluxDB(t *testing.T) {
-	skipIfNoLocalInfluxDB(t)
+	InfluxBatching = false
+	tc := &testClient{}
+	TestInfluxDBClient = tc
 
 	re, cleanup := RandomTestRunEnv(t)
 	t.Cleanup(cleanup)
 
 	re.D().RecordPoint("foo", 1234)
+	re.D().RecordPoint("foo", 1234)
+	re.D().RecordPoint("foo", 1234)
+	re.D().RecordPoint("foo", 1234)
+
+	require := require.New(t)
+
+	tc.RLock()
+	require.Len(tc.batchPoints, 4)
+	tc.RUnlock()
+
+	re.D().SetFrequency(500 * time.Millisecond)
+	re.D().NewCounter("counter").Inc(100)
+	re.D().NewHistogram("histogram1", re.D().NewUniformSample(100)).Update(123)
+
+	time.Sleep(1500 * time.Millisecond)
+
+	tc.RLock()
+	if l := len(tc.batchPoints); l != 6 && l != 8 && l != 10 {
+		t.Fatalf("expected length to be 6, 8, or 10; was: %d", l)
+	}
+	tc.RUnlock()
 
 	_ = re.Close()
 }
 
-func skipIfNoLocalInfluxDB(t *testing.T) {
-	if client, err := NewInfluxDBClient(); err != nil {
-		t.Skip()
-	} else {
-		client.Close()
-		setup, err := client.Setup(context.Background(), "foo", "foo", "testground", "foo", 0)
-		fmt.Println(setup, err)
-	}
+func TestResultsDispatchedOnClose(t *testing.T) {
+	InfluxBatching = false
+	tc := &testClient{}
+	TestInfluxDBClient = tc
+
+	re, cleanup := RandomTestRunEnv(t)
+	t.Cleanup(cleanup)
+
+	re.R().RecordPoint("foo", 1234)
+	re.R().RecordPoint("foo", 1234)
+	re.R().RecordPoint("foo", 1234)
+	re.R().RecordPoint("foo", 1234)
+
+	require := require.New(t)
+
+	tc.RLock()
+	require.Empty(tc.batchPoints)
+	tc.RUnlock()
+
+	re.R().SetFrequency(500 * time.Millisecond)
+	re.R().NewCounter("counter").Inc(100)
+	re.R().NewHistogram("histogram1", re.D().NewUniformSample(100)).Update(123)
+
+	time.Sleep(1500 * time.Millisecond)
+
+	tc.RLock()
+	require.Empty(tc.batchPoints)
+	tc.RUnlock()
+
+	_ = re.Close()
+
+	tc.RLock()
+	require.NotEmpty(tc.batchPoints)
+	tc.RUnlock()
 }
diff --git a/runtime/sinks.go b/runtime/sinks.go
deleted file mode 100644
index c4b54c6..0000000
--- a/runtime/sinks.go
+++ /dev/null
@@ -1,30 +0,0 @@
-package runtime
-
-import (
-	"encoding/json"
-	"time"
-
-	"github.com/influxdata/influxdb-client-go"
-)
-
-func LogSinkJSON(re *RunEnv, filename string) SinkFn {
-	f, err := re.CreateRawAsset(filename)
-	if err != nil {
-		panic(err)
-	}
-
-	enc := json.NewEncoder(f)
-	return func(m *Metric) error {
-		return enc.Encode(m)
-	}
-}
-
-func WriteToInfluxDB(re *RunEnv) SinkFn {
-	return func(m *Metric) error {
-		// NewPoint copies all tags and fields, so this is thread-safe.
-		p := influxdb2.NewPoint(m.Name, re.tags, m.Measures, time.Unix(0, m.Timestamp))
-		p.AddTag("type", m.Type.String())
-		re.wapi.WritePoint(p)
-		return nil
-	}
-}
diff --git a/test/runenv.go b/test/runenv.go
deleted file mode 100644
index 75974ed..0000000
--- a/test/runenv.go
+++ /dev/null
@@ -1,47 +0,0 @@
-package test
-
-import (
-	"fmt"
-	"io/ioutil"
-	"math/rand"
-	"net"
-	"os"
-	"testing"
-	"time"
-
-	"github.com/testground/sdk-go/runtime"
-)
-
-// RandomRunEnv generates a random RunEnv for testing purposes.
-func RandomRunEnv(t *testing.T) (re *runtime.RunEnv, cleanup func()) {
-	t.Helper()
-
-	b := make([]byte, 32)
-	_, _ = rand.Read(b)
-
-	_, subnet, _ := net.ParseCIDR("127.1.0.1/16")
-
-	odir, err := ioutil.TempDir("", "testground-tests-*")
-	if err != nil {
-		t.Fatalf("failed to create temp output dir: %s", err)
-	}
-
-	rp := runtime.RunParams{
-		TestPlan:               fmt.Sprintf("testplan-%d", rand.Uint32()),
-		TestSidecar:            false,
-		TestCase:               fmt.Sprintf("testcase-%d", rand.Uint32()),
-		TestRun:                fmt.Sprintf("testrun-%d", rand.Uint32()),
-		TestSubnet:             &runtime.IPNet{IPNet: *subnet},
-		TestInstanceCount:      int(1 + (rand.Uint32() % 999)),
-		TestInstanceRole:       "",
-		TestInstanceParams:     make(map[string]string),
-		TestGroupID:            fmt.Sprintf("group-%d", rand.Uint32()),
-		TestStartTime:          time.Now(),
-		TestGroupInstanceCount: int(1 + (rand.Uint32() % 999)),
-		TestOutputsPath:        odir,
-	}
-
-	return runtime.NewRunEnv(rp), func() {
-		_ = os.RemoveAll(odir)
-	}
-}

From b040b9f514cf74d6312f5da6863fb1223e59f526 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Wed, 29 Apr 2020 15:08:48 +0100
Subject: [PATCH 06/13] go mod tidy.

---
 go.mod |  4 +++-
 go.sum | 36 ------------------------------------
 2 files changed, 3 insertions(+), 37 deletions(-)

diff --git a/go.mod b/go.mod
index d8ecc41..bfaa946 100644
--- a/go.mod
+++ b/go.mod
@@ -7,11 +7,13 @@ require (
 	github.com/dustin/go-humanize v1.0.0
 	github.com/go-redis/redis/v7 v7.2.0
 	github.com/hashicorp/go-multierror v1.1.0
-	github.com/influxdata/influxdb-client-go v1.1.0
 	github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d
+	github.com/pkg/errors v0.9.1 // indirect
 	github.com/prometheus/client_golang v1.5.1
 	github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0
 	github.com/stretchr/testify v1.4.0
 	go.uber.org/zap v1.14.1
+	golang.org/x/net v0.0.0-20191112182307-2180aed22343 // indirect
 	golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
+	golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f // indirect
 )
diff --git a/go.sum b/go.sum
index 7edd436..5b9b1b7 100644
--- a/go.sum
+++ b/go.sum
@@ -12,20 +12,13 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/deepmap/oapi-codegen v1.3.6 h1:Wj44p9A0V0PJ+AUg0BWdyGcsS1LY18U+0rCuPQgK0+o=
-github.com/deepmap/oapi-codegen v1.3.6/go.mod h1:aBozjEveG+33xPiP55Iw/XbVkhtZHEGLq3nxlX0+hfU=
-github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
 github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
 github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
-github.com/getkin/kin-openapi v0.2.0/go.mod h1:V1z9xl9oF5Wt7v32ne4FmiF1alpS4dM6mNzoywPOXlk=
-github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
-github.com/go-chi/chi v4.0.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ=
 github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
 github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
 github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
@@ -38,7 +31,6 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
 github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golangci/lint-1 v0.0.0-20181222135242-d2cdd8c08219/go.mod h1:/X8TswGSh1pIozq4ZwCfxS0WA5JGXguxk94ar/4c87Y=
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
@@ -50,12 +42,8 @@ github.com/hashicorp/go-multierror v1.1.0 h1:B9UzwGQJehnUY1yNrnwREHc3fGbC2xefo8g
 github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
 github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
-github.com/influxdata/influxdb-client-go v1.1.0 h1:ht1HvNAfBuwY9/H0i1tOiVH4vHpkZ3gFwYD5j/xvyFA=
-github.com/influxdata/influxdb-client-go v1.1.0/go.mod h1:ZVjaPW87aKp5hzyny2WVpWVF0UY+iqtPz9veOZ2T1zw=
 github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d h1:/WZQPMZNsjZ7IlCpsLGdQBINg5bxKQ1K1sh6awxLtkA=
 github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo=
-github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 h1:W9WBk7wlPfJLvMCdtV4zPulc4uCPrlywQOmbFOhgQNU=
-github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
 github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
 github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
@@ -67,18 +55,6 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
-github.com/labstack/echo/v4 v4.1.11 h1:z0BZoArY4FqdpUEl+wlHp4hnr/oSR6MTmQmv8OHSoww=
-github.com/labstack/echo/v4 v4.1.11/go.mod h1:i541M3Fj6f76NZtHSj7TXnyM8n2gaodfvfxNnFqi74g=
-github.com/labstack/gommon v0.3.0 h1:JEeO0bvc78PKdyHxloTKiF8BD5iGrH8T6MSeGvSgob0=
-github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k=
-github.com/matryer/moq v0.0.0-20190312154309-6cfb0558e1bd/go.mod h1:9ELz6aaclSIGnZBoaSLZ3NAl1VTufbOrXBPvtcy6WiQ=
-github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
-github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA=
-github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
-github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
-github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ=
-github.com/mattn/go-isatty v0.0.10 h1:qxFzApOv4WsAL965uUPIsXzAKCZxN2p9UqdhFS4ZW10=
-github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
 github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -124,11 +100,6 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
-github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
-github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
-github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
-github.com/valyala/fasttemplate v1.1.0 h1:RZqt0yGBsps8NGvLSGW804QQqCUYYLsaOjTVHy1Ocw4=
-github.com/valyala/fasttemplate v1.1.0/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
 go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk=
 go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
 go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A=
@@ -140,9 +111,6 @@ go.uber.org/zap v1.14.1/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc=
 golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708 h1:pXVtWnwHkrWD9ru3sDxY/qFK/bfc0egRovX91EjWjf4=
-golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs=
 golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
@@ -167,13 +135,9 @@ golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5h
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200122134326-e047566fdf82 h1:ywK/j/KkyTHcdyYSZNXGjMwgmDSfjglYZ3vStQ/gSCU=
 golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

From 12bb0278516245d8cdd7d9a0c2dbfdc635d09da4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Wed, 29 Apr 2020 18:18:19 +0100
Subject: [PATCH 07/13] simplify and clean up solution.

---
 runtime/influxdb_client.go |  24 +----
 runtime/metrics.go         | 177 +++++++++++++++++++++++++++++++++++++
 runtime/metrics_api.go     |  17 +++-
 runtime/metrics_sinks.go   |  31 -------
 runtime/metrics_types.go   |   8 ++
 runtime/runenv.go          |  93 ++-----------------
 runtime/runenv_events.go   |  43 +--------
 runtime/runenv_http.go     |  67 ++++++++++++++
 8 files changed, 281 insertions(+), 179 deletions(-)
 create mode 100644 runtime/metrics.go
 delete mode 100644 runtime/metrics_sinks.go
 create mode 100644 runtime/runenv_http.go

diff --git a/runtime/influxdb_client.go b/runtime/influxdb_client.go
index 0c0e763..faf99f0 100644
--- a/runtime/influxdb_client.go
+++ b/runtime/influxdb_client.go
@@ -5,8 +5,6 @@ import (
 	"os"
 	"time"
 
-	"github.com/avast/retry-go"
-
 	_ "github.com/influxdata/influxdb1-client" // this is important because of the bug in go mod
 	client "github.com/influxdata/influxdb1-client/v2"
 )
@@ -26,25 +24,9 @@ func NewInfluxDBClient(re *RunEnv) (client.Client, error) {
 
 	addr := os.Getenv(EnvInfluxDBAddr)
 	if addr == "" {
-		return nil, fmt.Errorf("no InfluxDB URL in $%s env var", EnvInfluxDBAddr)
-	}
-
-	cfg := client.HTTPConfig{Addr: addr, Timeout: 5}
-	client, err := client.NewHTTPClient(cfg)
-	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("no InfluxDB address in $%s env var", EnvInfluxDBAddr)
 	}
 
-	ping := func() error {
-		_, _, err := client.Ping(2 * time.Second)
-		return err
-	}
-	err = retry.Do(ping,
-		retry.Attempts(5),
-		retry.MaxDelay(500*time.Millisecond),
-		retry.OnRetry(func(n uint, err error) {
-			re.RecordMessage("failed attempt number %d to ping InfluxDB at %s: %s", n, addr, err)
-		}),
-	)
-	return client, err
+	cfg := client.HTTPConfig{Addr: addr, Timeout: 5 * time.Second}
+	return client.NewHTTPClient(cfg)
 }
diff --git a/runtime/metrics.go b/runtime/metrics.go
new file mode 100644
index 0000000..3d038dd
--- /dev/null
+++ b/runtime/metrics.go
@@ -0,0 +1,177 @@
+package runtime
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/hashicorp/go-multierror"
+	_ "github.com/influxdata/influxdb1-client"
+	client "github.com/influxdata/influxdb1-client/v2"
+	"github.com/rcrowley/go-metrics"
+)
+
+type Metrics struct {
+	re          *RunEnv
+	diagnostics *MetricsApi
+	results     *MetricsApi
+	influxdb    client.Client
+	batcher     Batcher
+	tags        map[string]string
+}
+
+func newMetrics(re *RunEnv) *Metrics {
+	m := &Metrics{re: re}
+
+	var dsinks = []MetricSinkFn{m.logSinkJSON("diagnostics.out")}
+	if client, err := NewInfluxDBClient(re); err == nil {
+		m.tags = map[string]string{
+			"plan":     re.TestPlan,
+			"case":     re.TestCase,
+			"run":      re.TestRun,
+			"group_id": re.TestGroupID,
+		}
+
+		m.influxdb = client
+		if InfluxBatching {
+			m.batcher = newBatcher(re, client, InfluxBatchLength, InfluxBatchInterval, InfluxBatchRetryOpts(re)...)
+		} else {
+			m.batcher = &nilBatcher{client}
+		}
+
+		dsinks = append(dsinks, m.writeToInfluxDBSink("diagnostics"))
+	} else {
+		re.RecordMessage("InfluxDB unavailable; no metrics will be dispatched: %s", err)
+	}
+
+	m.diagnostics = newMetricsApi(re, metricsApiOpts{
+		freq:        5 * time.Second,
+		preregister: metrics.RegisterRuntimeMemStats,
+		callbacks:   []func(metrics.Registry){metrics.CaptureRuntimeMemStatsOnce},
+		sinks:       dsinks,
+	})
+
+	m.results = newMetricsApi(re, metricsApiOpts{
+		freq:  1 * time.Second,
+		sinks: []MetricSinkFn{m.logSinkJSON("results.out")},
+	})
+
+	return m
+}
+
+func (m *Metrics) R() *MetricsApi {
+	return m.results
+}
+
+func (m *Metrics) D() *MetricsApi {
+	return m.diagnostics
+}
+
+func (m *Metrics) Close() error {
+	var err *multierror.Error
+
+	// close diagnostics; this stops the ticker and any further observations on
+	// runenv.D() will fail/panic.
+	err = multierror.Append(err, m.diagnostics.Close())
+
+	// close results; no more results via runenv.R() can be recorded.
+	err = multierror.Append(err, m.results.Close())
+
+	if m.influxdb != nil {
+		// Next, we reopen the results.out file, and write all points to InfluxDB.
+		results := filepath.Join(m.re.TestOutputsPath, "results.out")
+		if file, errf := os.OpenFile(results, os.O_RDONLY, 0666); errf == nil {
+			err = multierror.Append(err, m.batchInsertInfluxDB(file))
+		} else {
+			err = multierror.Append(err, errf)
+		}
+	}
+
+	// Flush the immediate InfluxDB writer.
+	if m.batcher != nil {
+		err = multierror.Append(err, m.batcher.Close())
+	}
+
+	// Now we're ready to close InfluxDB.
+	if m.influxdb != nil {
+		err = multierror.Append(err, m.influxdb.Close())
+	}
+
+	return err.ErrorOrNil()
+}
+
+func (m *Metrics) batchInsertInfluxDB(results *os.File) error {
+	sink := m.writeToInfluxDBSink("results")
+
+	for dec := json.NewDecoder(results); dec.More(); {
+		var me Metric
+		if err := dec.Decode(&me); err != nil {
+			m.re.RecordMessage("failed to decode Metric from results.out: %s", err)
+			continue
+		}
+
+		if err := sink(&me); err != nil {
+			m.re.RecordMessage("failed to process Metric from results.out: %s", err)
+		}
+	}
+	return nil
+}
+
+func (m *Metrics) logSinkJSON(filename string) MetricSinkFn {
+	f, err := m.re.CreateRawAsset(filename)
+	if err != nil {
+		panic(err)
+	}
+
+	enc := json.NewEncoder(f)
+	return func(m *Metric) error {
+		return enc.Encode(m)
+	}
+}
+
+func (m *Metrics) writeToInfluxDBSink(collection string) MetricSinkFn {
+	return func(me *Metric) error {
+		// this map copy is terrible; the influxdb v2 SDK makes points mutable.
+		tags := make(map[string]string, len(m.tags)+1)
+		for k, v := range m.tags {
+			tags[k] = v
+		}
+		tags["metric"] = me.Name
+
+		p, err := client.NewPoint(collection, tags, me.Measures, time.Unix(0, me.Timestamp))
+		if err != nil {
+			return err
+		}
+		m.batcher.WritePoint(p)
+		return nil
+	}
+}
+
+func (m *Metrics) recordEvent(evt *Event) {
+	if m.influxdb == nil {
+		return
+	}
+
+	// this map copy is terrible; the influxdb v2 SDK makes points mutable.
+	tags := make(map[string]string, len(m.tags)+1)
+	for k, v := range m.tags {
+		tags[k] = v
+	}
+
+	tags["type"] = string(evt.Type)
+
+	if evt.Outcome != "" {
+		tags["outcome"] = string(evt.Outcome)
+	}
+
+	f := map[string]interface{}{
+		"error": evt.Error,
+	}
+
+	p, err := client.NewPoint("events", tags, f)
+	if err != nil {
+		m.re.RecordMessage("failed to create InfluxDB point: %s", err)
+	}
+	m.batcher.WritePoint(p)
+}
diff --git a/runtime/metrics_api.go b/runtime/metrics_api.go
index 2de5989..5230f22 100644
--- a/runtime/metrics_api.go
+++ b/runtime/metrics_api.go
@@ -36,14 +36,19 @@ type MetricsApi struct {
 	// freq is the frequency with which to materialize aggregated metrics.
 	freq time.Duration
 
+	// callbacks are callbacks functions to call on every tick.
+	callbacks []func(registry metrics.Registry)
+
 	wg           sync.WaitGroup
 	freqChangeCh chan time.Duration
 	doneCh       chan struct{}
 }
 
 type metricsApiOpts struct {
-	freq  time.Duration
-	sinks []MetricSinkFn
+	freq        time.Duration
+	preregister func(registry metrics.Registry)
+	callbacks   []func(registry metrics.Registry)
+	sinks       []MetricSinkFn
 }
 
 func newMetricsApi(re *RunEnv, opts metricsApiOpts) *MetricsApi {
@@ -52,10 +57,15 @@ func newMetricsApi(re *RunEnv, opts metricsApiOpts) *MetricsApi {
 		reg:          metrics.NewRegistry(),
 		sinks:        opts.sinks,
 		freq:         opts.freq,
+		callbacks:    opts.callbacks,
 		freqChangeCh: make(chan time.Duration),
 		doneCh:       make(chan struct{}),
 	}
 
+	if opts.preregister != nil {
+		opts.preregister(m.reg)
+	}
+
 	m.wg.Add(1)
 	go m.background()
 	return m
@@ -92,6 +102,9 @@ func (m *MetricsApi) background() {
 	for {
 		select {
 		case <-c:
+			for _, a := range m.callbacks {
+				a(m.reg)
+			}
 			m.reg.Each(m.broadcast)
 
 		case f := <-m.freqChangeCh:
diff --git a/runtime/metrics_sinks.go b/runtime/metrics_sinks.go
deleted file mode 100644
index c29292c..0000000
--- a/runtime/metrics_sinks.go
+++ /dev/null
@@ -1,31 +0,0 @@
-package runtime
-
-import (
-	"encoding/json"
-	"time"
-
-	client "github.com/influxdata/influxdb1-client/v2"
-)
-
-func LogSinkJSON(re *RunEnv, filename string) MetricSinkFn {
-	f, err := re.CreateRawAsset(filename)
-	if err != nil {
-		panic(err)
-	}
-
-	enc := json.NewEncoder(f)
-	return func(m *Metric) error {
-		return enc.Encode(m)
-	}
-}
-
-func WriteToInfluxDBSink(re *RunEnv, name string) MetricSinkFn {
-	return func(m *Metric) error {
-		p, err := client.NewPoint(name, re.tags, m.Measures, time.Unix(0, m.Timestamp))
-		if err != nil {
-			return err
-		}
-		re.batcher.WritePoint(p)
-		return nil
-	}
-}
diff --git a/runtime/metrics_types.go b/runtime/metrics_types.go
index c5c37a1..12e1b21 100644
--- a/runtime/metrics_types.go
+++ b/runtime/metrics_types.go
@@ -5,6 +5,8 @@ import (
 	"fmt"
 	"sync"
 	"time"
+
+	"github.com/rcrowley/go-metrics"
 )
 
 type MetricType int
@@ -95,6 +97,12 @@ func NewMetric(name string, i interface{}) *Metric {
 		s := v.Snapshot()
 		m.Measures["value"] = s.Value()
 
+	case metrics.Gauge:
+		t = MetricGauge
+		m = pools[t].Get().(*Metric)
+		s := v.Snapshot()
+		m.Measures["value"] = float64(s.Value())
+
 	case Histogram:
 		t = MetricHistogram
 		m = pools[t].Get().(*Metric)
diff --git a/runtime/runenv.go b/runtime/runenv.go
index 3c412cc..243f5a7 100644
--- a/runtime/runenv.go
+++ b/runtime/runenv.go
@@ -1,16 +1,13 @@
 package runtime
 
 import (
-	"encoding/json"
 	"os"
-	"path/filepath"
 	"sync"
 	"time"
 
 	"github.com/avast/retry-go"
 	"github.com/hashicorp/go-multierror"
 	_ "github.com/influxdata/influxdb1-client" // this is important because of the bug in go mod
-	client "github.com/influxdata/influxdb1-client/v2"
 	"go.uber.org/zap"
 )
 
@@ -33,13 +30,8 @@ var (
 type RunEnv struct {
 	RunParams
 
-	logger *zap.Logger
-
-	diagnostics *MetricsApi
-	results     *MetricsApi
-	influxdb    client.Client
-	batcher     Batcher
-	tags        map[string]string
+	logger  *zap.Logger
+	metrics *Metrics
 
 	wg        sync.WaitGroup
 	closeCh   chan struct{}
@@ -73,49 +65,19 @@ func NewRunEnv(params RunParams) *RunEnv {
 	re.wg.Add(1)
 	go re.manageAssets()
 
-	var dsinks = []MetricSinkFn{LogSinkJSON(re, "diagnostics.out")}
-	client, err := NewInfluxDBClient(re)
-	if err == nil {
-		re.tags = map[string]string{
-			"plan":     re.TestPlan,
-			"case":     re.TestCase,
-			"run":      re.TestRun,
-			"group_id": re.TestGroupID,
-		}
-
-		re.influxdb = client
-		if InfluxBatching {
-			re.batcher = newBatcher(re, client, InfluxBatchLength, InfluxBatchInterval, InfluxBatchRetryOpts(re)...)
-		} else {
-			re.batcher = &nilBatcher{client}
-		}
-
-		dsinks = append(dsinks, WriteToInfluxDBSink(re, "diagnostics"))
-	} else {
-		re.RecordMessage("InfluxDB unavailable; no metrics will be dispatched: %s", err)
-	}
-
-	re.diagnostics = newMetricsApi(re, metricsApiOpts{
-		freq:  1 * time.Second,
-		sinks: dsinks,
-	})
-
-	re.results = newMetricsApi(re, metricsApiOpts{
-		freq:  1 * time.Second,
-		sinks: []MetricSinkFn{LogSinkJSON(re, "results.out")},
-	})
+	re.metrics = newMetrics(re)
 
 	return re
 }
 
 // R returns a metrics object for results.
 func (re *RunEnv) R() *MetricsApi {
-	return re.results
+	return re.metrics.R()
 }
 
 // D returns a metrics object for diagnostics.
 func (re *RunEnv) D() *MetricsApi {
-	return re.diagnostics
+	return re.metrics.D()
 }
 
 func (re *RunEnv) manageAssets() {
@@ -145,38 +107,14 @@ func (re *RunEnv) manageAssets() {
 func (re *RunEnv) Close() error {
 	var err *multierror.Error
 
-	// close diagnostics; this stops the ticker and any further observations on
-	// runenv.D() will fail/panic.
-	err = multierror.Append(re.diagnostics.Close())
-
-	// close results; no more results via runenv.R() can be recorded.
-	err = multierror.Append(re.results.Close())
-
-	if re.influxdb != nil {
-		// Next, we reopen the results.out file, and write all points to InfluxDB.
-		results := filepath.Join(re.TestOutputsPath, "results.out")
-		if file, errf := os.OpenFile(results, os.O_RDONLY, 0666); errf == nil {
-			err = multierror.Append(err, re.batchInsertInfluxDB(file))
-		} else {
-			err = multierror.Append(err, errf)
-		}
-	}
-
-	// Flush the immediate InfluxDB writer.
-	if re.batcher != nil {
-		err = multierror.Append(err, re.batcher.Close())
-	}
+	// close metrics.
+	err = multierror.Append(err, re.metrics.Close())
 
 	// This close stops monitoring the wapi errors channel, and closes assets.
 	close(re.closeCh)
 	re.wg.Wait()
 	err = multierror.Append(err, re.assetsErr)
 
-	// Now we're ready to close InfluxDB.
-	if re.influxdb != nil {
-		err = multierror.Append(err, re.influxdb.Close())
-	}
-
 	if l := re.logger; l != nil {
 		_ = l.Sync()
 	}
@@ -184,23 +122,6 @@ func (re *RunEnv) Close() error {
 	return err.ErrorOrNil()
 }
 
-func (re *RunEnv) batchInsertInfluxDB(results *os.File) error {
-	sink := WriteToInfluxDBSink(re, "results")
-
-	for dec := json.NewDecoder(results); dec.More(); {
-		var m Metric
-		if err := dec.Decode(&m); err != nil {
-			re.RecordMessage("failed to decode Metric from results.out: %s", err)
-			continue
-		}
-
-		if err := sink(&m); err != nil {
-			re.RecordMessage("failed to process Metric from results.out: %s", err)
-		}
-	}
-	return nil
-}
-
 // CurrentRunEnv populates a test context from environment vars.
 func CurrentRunEnv() *RunEnv {
 	re, _ := ParseRunEnv(os.Environ())
diff --git a/runtime/runenv_events.go b/runtime/runenv_events.go
index 8b4bb92..274aec9 100644
--- a/runtime/runenv_events.go
+++ b/runtime/runenv_events.go
@@ -4,7 +4,6 @@ import (
 	"fmt"
 	"runtime/debug"
 
-	client "github.com/influxdata/influxdb1-client/v2"
 	"go.uber.org/zap"
 	"go.uber.org/zap/zapcore"
 )
@@ -110,9 +109,7 @@ func (re *RunEnv) RecordStart() {
 	}
 
 	re.logger.Info("", zap.Object("event", evt))
-	if re.batcher != nil {
-		re.recordEventInInfluxDB("instance.start", "", nil)
-	}
+	re.metrics.recordEvent(&evt)
 }
 
 // RecordSuccess records that the calling instance succeeded.
@@ -122,9 +119,7 @@ func (re *RunEnv) RecordSuccess() {
 		Outcome: EventOutcomeOK,
 	}
 	re.logger.Info("", zap.Object("event", evt))
-	if re.batcher != nil {
-		re.recordEventInInfluxDB("instance.success", "", nil)
-	}
+	re.metrics.recordEvent(&evt)
 }
 
 // RecordFailure records that the calling instance failed with the supplied
@@ -136,11 +131,7 @@ func (re *RunEnv) RecordFailure(err error) {
 		Error:   err.Error(),
 	}
 	re.logger.Info("", zap.Object("event", evt))
-	if re.batcher != nil {
-		re.recordEventInInfluxDB("instance.finish", "failed", map[string]interface{}{
-			"error": err.Error(),
-		})
-	}
+	re.metrics.recordEvent(&evt)
 }
 
 // RecordCrash records that the calling instance crashed/panicked with the
@@ -153,31 +144,5 @@ func (re *RunEnv) RecordCrash(err interface{}) {
 		Stacktrace: string(debug.Stack()),
 	}
 	re.logger.Error("", zap.Object("event", evt))
-	if re.batcher != nil {
-		re.recordEventInInfluxDB("instance.finish", "crash", map[string]interface{}{
-			"error": fmt.Sprintf("%s", err),
-		})
-	}
-}
-
-func (re *RunEnv) recordEventInInfluxDB(typ string, outcome string, f map[string]interface{}) {
-	// this map copy is terrible; the influxdb v2 SDK makes points mutable.
-	tags := make(map[string]string, len(re.tags)+1)
-	for k, v := range re.tags {
-		tags[k] = v
-	}
-	tags["type"] = typ
-	if outcome != "" {
-		tags["outcome"] = outcome
-	}
-
-	if f == nil {
-		f = map[string]interface{}{}
-	}
-
-	p, err := client.NewPoint("events", tags, f)
-	if err != nil {
-		re.RecordMessage("failed to create InfluxDB point: %s", err)
-	}
-	re.batcher.WritePoint(p)
+	re.metrics.recordEvent(&evt)
 }
diff --git a/runtime/runenv_http.go b/runtime/runenv_http.go
new file mode 100644
index 0000000..eee2d5a
--- /dev/null
+++ b/runtime/runenv_http.go
@@ -0,0 +1,67 @@
+package runtime
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"os"
+	"path"
+	"strconv"
+	"time"
+)
+
+// HTTPPeriodicSnapshots periodically fetches the snapshots from the given address
+// and outputs them to the out directory. Every file will be in the format timestamp.out.
+func (re *RunEnv) HTTPPeriodicSnapshots(ctx context.Context, addr string, dur time.Duration, outDir string) error {
+	err := os.MkdirAll(path.Join(re.TestOutputsPath, outDir), 0777)
+	if err != nil {
+		return err
+	}
+
+	nextFile := func() (*os.File, error) {
+		timestamp := strconv.FormatInt(time.Now().Unix(), 10)
+		return os.Create(path.Join(re.TestOutputsPath, outDir, timestamp+".out"))
+	}
+
+	go func() {
+		ticker := time.NewTicker(dur)
+		defer ticker.Stop()
+
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticker.C:
+				func() {
+					req, err := http.NewRequestWithContext(ctx, "GET", addr, nil)
+					if err != nil {
+						re.RecordMessage("error while creating http request: %v", err)
+						return
+					}
+
+					resp, err := http.DefaultClient.Do(req)
+					if err != nil {
+						re.RecordMessage("error while scraping http endpoint: %v", err)
+						return
+					}
+					defer resp.Body.Close()
+
+					file, err := nextFile()
+					if err != nil {
+						re.RecordMessage("error while getting metrics output file: %v", err)
+						return
+					}
+					defer file.Close()
+
+					_, err = io.Copy(file, resp.Body)
+					if err != nil {
+						re.RecordMessage("error while copying data to file: %v", err)
+						return
+					}
+				}()
+			}
+		}
+	}()
+
+	return nil
+}

From 829111209cba2af38892f479cc40f831ab65dc2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Wed, 29 Apr 2020 22:18:39 +0100
Subject: [PATCH 08/13] adjust points translation.

---
 runtime/influxdb_batch.go | 12 ++++++++++++
 runtime/metrics.go        | 16 ++++++++--------
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/runtime/influxdb_batch.go b/runtime/influxdb_batch.go
index 57dd746..707492f 100644
--- a/runtime/influxdb_batch.go
+++ b/runtime/influxdb_batch.go
@@ -82,6 +82,9 @@ func (b *batcher) background() {
 		case err := <-b.sendRes:
 			if err == nil {
 				b.pending = b.pending[len(b.sending):]
+				b.re.RecordMessage("influxdb: uploaded %d points", len(b.sending))
+			} else {
+				b.re.RecordMessage("influxdb: failed to upload %d points; err: %s", len(b.sending), err)
 			}
 			b.sending = nil
 			if len(b.pending) >= b.length {
@@ -110,6 +113,9 @@ func (b *batcher) background() {
 				// we are currently sending, wait for the send to finish first.
 				if err := <-b.sendRes; err == nil {
 					b.pending = b.pending[len(b.sending):]
+					b.re.RecordMessage("influxdb: uploaded %d points", len(b.sending))
+				} else {
+					b.re.RecordMessage("influxdb: failed to upload %d points; err: %s", len(b.sending), err)
 				}
 			}
 
@@ -119,6 +125,12 @@ func (b *batcher) background() {
 				b.sending = b.pending
 				go b.send()
 				err = <-b.sendRes
+				if err == nil {
+					b.re.RecordMessage("influxdb: uploaded %d points", len(b.sending))
+				} else {
+					b.re.RecordMessage("influxdb: failed to upload %d points; err: %s", len(b.sending), err)
+				}
+				b.sending = nil
 			}
 			b.doneErr <- err
 			return
diff --git a/runtime/metrics.go b/runtime/metrics.go
index 3d038dd..a071f0c 100644
--- a/runtime/metrics.go
+++ b/runtime/metrics.go
@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"os"
 	"path/filepath"
+	"strings"
 	"time"
 
 	"github.com/hashicorp/go-multierror"
@@ -130,16 +131,15 @@ func (m *Metrics) logSinkJSON(filename string) MetricSinkFn {
 	}
 }
 
-func (m *Metrics) writeToInfluxDBSink(collection string) MetricSinkFn {
-	return func(me *Metric) error {
-		// this map copy is terrible; the influxdb v2 SDK makes points mutable.
-		tags := make(map[string]string, len(m.tags)+1)
-		for k, v := range m.tags {
-			tags[k] = v
+func (m *Metrics) writeToInfluxDBSink(measurement string) MetricSinkFn {
+	return func(metric *Metric) error {
+		fields := make(map[string]interface{}, len(metric.Measures))
+		for k, v := range metric.Measures {
+			key := strings.Join([]string{metric.Name, metric.Type.String(), k}, ".")
+			fields[key] = v
 		}
-		tags["metric"] = me.Name
 
-		p, err := client.NewPoint(collection, tags, me.Measures, time.Unix(0, me.Timestamp))
+		p, err := client.NewPoint(measurement, m.tags, fields, time.Unix(0, metric.Timestamp))
 		if err != nil {
 			return err
 		}

From 6fc3a0798158ba11cb81757d321e3bc9a50b0e8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Thu, 30 Apr 2020 11:14:57 +0100
Subject: [PATCH 09/13] test: remove setting INFLUXDB_URL env var.

---
 runtime/runenv_test.go | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/runtime/runenv_test.go b/runtime/runenv_test.go
index 9da260a..b22872f 100644
--- a/runtime/runenv_test.go
+++ b/runtime/runenv_test.go
@@ -13,10 +13,6 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-func init() {
-	_ = os.Setenv("INFLUXDB_URL", "http://localhost:9999")
-}
-
 func TestParseKeyValues(t *testing.T) {
 	type args struct {
 		in []string

From 429e8794f3f89424ca497afe4701b3b0a613ac28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Thu, 30 Apr 2020 11:18:31 +0100
Subject: [PATCH 10/13] rename INFLUXDB_ADDR env var to INFLUXDB_URL for
 correctness.

---
 runtime/influxdb_client.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/runtime/influxdb_client.go b/runtime/influxdb_client.go
index faf99f0..46729c2 100644
--- a/runtime/influxdb_client.go
+++ b/runtime/influxdb_client.go
@@ -9,7 +9,7 @@ import (
 	client "github.com/influxdata/influxdb1-client/v2"
 )
 
-const EnvInfluxDBAddr = "INFLUXDB_ADDR"
+const EnvInfluxDBURL = "INFLUXDB_URL"
 
 var (
 	// TestInfluxDBClient sets a client for testing. If this value is set,
@@ -22,9 +22,9 @@ func NewInfluxDBClient(re *RunEnv) (client.Client, error) {
 		return TestInfluxDBClient, nil
 	}
 
-	addr := os.Getenv(EnvInfluxDBAddr)
+	addr := os.Getenv(EnvInfluxDBURL)
 	if addr == "" {
-		return nil, fmt.Errorf("no InfluxDB address in $%s env var", EnvInfluxDBAddr)
+		return nil, fmt.Errorf("no InfluxDB URL in $%s env var", EnvInfluxDBURL)
 	}
 
 	cfg := client.HTTPConfig{Addr: addr, Timeout: 5 * time.Second}

From dc538529eb6e0ee69a72fe528bba26d118fce75d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Thu, 30 Apr 2020 11:44:02 +0100
Subject: [PATCH 11/13] add TestFrequencyChange test.

---
 runtime/runenv_test.go | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/runtime/runenv_test.go b/runtime/runenv_test.go
index b22872f..002829e 100644
--- a/runtime/runenv_test.go
+++ b/runtime/runenv_test.go
@@ -271,3 +271,32 @@ func TestResultsDispatchedOnClose(t *testing.T) {
 	require.NotEmpty(tc.batchPoints)
 	tc.RUnlock()
 }
+
+func TestFrequencyChange(t *testing.T) {
+	InfluxBatching = false
+	tc := &testClient{}
+	TestInfluxDBClient = tc
+
+	re, cleanup := RandomTestRunEnv(t)
+	t.Cleanup(cleanup)
+
+	// set an abnormally high frequency to verify that no points are produced.
+	re.D().SetFrequency(24 * time.Hour)
+	counter := re.D().NewCounter("foo")
+	counter.Inc(100)
+
+	require := require.New(t)
+
+	time.Sleep(1500 * time.Millisecond)
+
+	tc.RLock()
+	require.Empty(tc.batchPoints)
+	tc.RUnlock()
+
+	re.D().SetFrequency(100 * time.Millisecond)
+	time.Sleep(1000 * time.Millisecond)
+
+	tc.RLock()
+	require.Greater(len(tc.batchPoints), 5)
+	tc.RUnlock()
+}

From f69054c9c7b8ece640799da485dfd0152f15a0d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Thu, 30 Apr 2020 11:48:10 +0100
Subject: [PATCH 12/13] remove New prefix from aggregated metrics accessors.

---
 runtime/metrics_api.go | 14 +++++++-------
 runtime/runenv_test.go | 16 ++++++++--------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/runtime/metrics_api.go b/runtime/metrics_api.go
index 5230f22..c6eb955 100644
--- a/runtime/metrics_api.go
+++ b/runtime/metrics_api.go
@@ -144,31 +144,31 @@ func (m *MetricsApi) RecordPoint(name string, value float64) {
 	m.broadcast(name, Point(value))
 }
 
-func (m *MetricsApi) NewCounter(name string) Counter {
+func (m *MetricsApi) Counter(name string) Counter {
 	return m.reg.GetOrRegister(name, metrics.NewCounter()).(metrics.Counter)
 }
 
-func (m *MetricsApi) NewEWMA(name string, alpha float64) EWMA {
+func (m *MetricsApi) EWMA(name string, alpha float64) EWMA {
 	return m.reg.GetOrRegister(name, metrics.NewEWMA(alpha)).(metrics.EWMA)
 }
 
-func (m *MetricsApi) NewGauge(name string) Gauge {
+func (m *MetricsApi) Gauge(name string) Gauge {
 	return m.reg.GetOrRegister(name, metrics.NewGaugeFloat64()).(metrics.GaugeFloat64)
 }
 
-func (m *MetricsApi) NewFunctionalGauge(name string, f func() float64) Gauge {
+func (m *MetricsApi) GaugeFunctional(name string, f func() float64) Gauge {
 	return m.reg.GetOrRegister(name, metrics.NewFunctionalGaugeFloat64(f)).(metrics.GaugeFloat64)
 }
 
-func (m *MetricsApi) NewHistogram(name string, s Sample) Histogram {
+func (m *MetricsApi) Histogram(name string, s Sample) Histogram {
 	return m.reg.GetOrRegister(name, metrics.NewHistogram(s)).(metrics.Histogram)
 }
 
-func (m *MetricsApi) NewMeter(name string) Meter {
+func (m *MetricsApi) Meter(name string) Meter {
 	return m.reg.GetOrRegister(name, metrics.NewMeter()).(metrics.Meter)
 }
 
-func (m *MetricsApi) NewTimer(name string) Timer {
+func (m *MetricsApi) Timer(name string) Timer {
 	return m.reg.GetOrRegister(name, metrics.NewTimer()).(metrics.Timer)
 }
 
diff --git a/runtime/runenv_test.go b/runtime/runenv_test.go
index 002829e..8bc39a4 100644
--- a/runtime/runenv_test.go
+++ b/runtime/runenv_test.go
@@ -149,9 +149,9 @@ func TestMetricsRecordedInFile(t *testing.T) {
 			api.SetFrequency(200 * time.Millisecond)
 			api.RecordPoint("point1", 123)
 			api.RecordPoint("point2", 123)
-			api.NewCounter("counter1").Inc(50)
-			api.NewMeter("meter1").Mark(50)
-			api.NewTimer("timer1").Update(5 * time.Second)
+			api.Counter("counter1").Inc(50)
+			api.Meter("meter1").Mark(50)
+			api.Timer("timer1").Update(5 * time.Second)
 
 			time.Sleep(1 * time.Second)
 
@@ -222,8 +222,8 @@ func TestDiagnosticsDispatchedToInfluxDB(t *testing.T) {
 	tc.RUnlock()
 
 	re.D().SetFrequency(500 * time.Millisecond)
-	re.D().NewCounter("counter").Inc(100)
-	re.D().NewHistogram("histogram1", re.D().NewUniformSample(100)).Update(123)
+	re.D().Counter("counter").Inc(100)
+	re.D().Histogram("histogram1", re.D().NewUniformSample(100)).Update(123)
 
 	time.Sleep(1500 * time.Millisecond)
 
@@ -256,8 +256,8 @@ func TestResultsDispatchedOnClose(t *testing.T) {
 	tc.RUnlock()
 
 	re.R().SetFrequency(500 * time.Millisecond)
-	re.R().NewCounter("counter").Inc(100)
-	re.R().NewHistogram("histogram1", re.D().NewUniformSample(100)).Update(123)
+	re.R().Counter("counter").Inc(100)
+	re.R().Histogram("histogram1", re.D().NewUniformSample(100)).Update(123)
 
 	time.Sleep(1500 * time.Millisecond)
 
@@ -282,7 +282,7 @@ func TestFrequencyChange(t *testing.T) {
 
 	// set an abnormally high frequency to verify that no points are produced.
 	re.D().SetFrequency(24 * time.Hour)
-	counter := re.D().NewCounter("foo")
+	counter := re.D().Counter("foo")
 	counter.Inc(100)
 
 	require := require.New(t)

From e00149ed04ff62524e65012224d807ef988e8928 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= <raul@protocol.ai>
Date: Thu, 30 Apr 2020 11:49:26 +0100
Subject: [PATCH 13/13] rename GaugeFunctional to GaugeF.

---
 runtime/metrics_api.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runtime/metrics_api.go b/runtime/metrics_api.go
index c6eb955..f01ac09 100644
--- a/runtime/metrics_api.go
+++ b/runtime/metrics_api.go
@@ -156,7 +156,7 @@ func (m *MetricsApi) Gauge(name string) Gauge {
 	return m.reg.GetOrRegister(name, metrics.NewGaugeFloat64()).(metrics.GaugeFloat64)
 }
 
-func (m *MetricsApi) GaugeFunctional(name string, f func() float64) Gauge {
+func (m *MetricsApi) GaugeF(name string, f func() float64) Gauge {
 	return m.reg.GetOrRegister(name, metrics.NewFunctionalGaugeFloat64(f)).(metrics.GaugeFloat64)
 }