Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prometheus metrics #2981

Merged
merged 1 commit into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions charts/otv-backend/templates/servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{{ if and .Values.serviceMonitor.enabled ( ne .Values.environment "ci" ) }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ .Release.Name }}
labels:
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
spec:
selector:
matchLabels:
app: {{ .Release.Name }}
endpoints:
- port: backend
{{ end }}
2 changes: 1 addition & 1 deletion charts/otv-backend/templates/statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ spec:
labels:
app: {{ .Release.Name }}
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
spec:
containers:
Expand Down
6 changes: 6 additions & 0 deletions charts/otv-backend/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ storageSize: 20Gi
# memory: 400Mi
resources: {}

serviceMonitor:
enabled: true
labels:
group: w3f
release: prometheus-operator

secret: |
{
"matrix": {
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
"mongodb-memory-server": "^9.1.7",
"mongoose": "^8.2.1",
"prettier": "^3.2.4",
"prom-client": "^15.1.2",
"semver": "^7.6.0",
"swagger-jsdoc": "^6.2.8",
"swagger2": "^4.0.3",
Expand Down
8 changes: 7 additions & 1 deletion packages/common/src/ApiHandler/ApiHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import EventEmitter from "eventemitter3";

import logger from "../logger";
import { API_PROVIDER_TIMEOUT, POLKADOT_API_TIMEOUT } from "../constants";
import { setChainRpcConnectivity } from "../metrics";

export const apiLabel = { label: "ApiHandler" };

Expand All @@ -20,9 +21,12 @@ class ApiHandler extends EventEmitter {

public upSince = -1;
public isConnected = false;
public chain: string;

constructor(endpoints: string[]) {
// chain is a name for type of chain: relay, people. Used for Prometheus metrics.
constructor(chain: string, endpoints: string[]) {
super();
this.chain = chain;
this.endpoints = endpoints.sort(() => Math.random() - 0.5);
}

Expand All @@ -47,6 +51,7 @@ class ApiHandler extends EventEmitter {
}

this.isConnected = false;
setChainRpcConnectivity(this.chain, false);
this.connectionAttempt = new Promise(async (resolve) => {
try {
await this.wsProvider.connect();
Expand All @@ -63,6 +68,7 @@ class ApiHandler extends EventEmitter {
this.connectionAttempt = null;
this.upSince = Date.now();
this.isConnected = true;
setChainRpcConnectivity(this.chain, true);
logger.info(`Connected to ${this.currentEndpoint()}`, apiLabel);
resolve();
} catch (err) {
Expand Down
4 changes: 3 additions & 1 deletion packages/common/src/ApiHandler/__mocks__/ApiHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ const createMockApiPromise = (): any => ({
class ApiHandlerMock extends EventEmitter {
private endpoints: string[];
private api: ApiPromise = createMockApiPromise as unknown as ApiPromise;
public chain: string;

constructor(endpoints: string[]) {
constructor(chain: string, endpoints: string[]) {
super();
this.chain = chain;
// Initialize with mock data or behavior as needed
this.endpoints = endpoints.sort(() => Math.random() - 0.5);
this.api = createMockApiPromise();
Expand Down
9 changes: 9 additions & 0 deletions packages/common/src/__mocks__/metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { vi } from "vitest";

export const setupMetrics = vi.fn();
export const registerBlockScan = vi.fn();
export const registerNomination = vi.fn();
export const setDbConnectivity = vi.fn();
export const setTelemetryConnectivity = vi.fn();
export const setChainRpcConnectivity = vi.fn();
export const renderMetrics = vi.fn(() => Promise.resolve(""));
1 change: 1 addition & 0 deletions packages/common/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ export type ConfigSchema = {
kusamaBootstrapEndpoint: string;
polkadotBootstrapEndpoint: string;
candidatesUrl: string;
prometheusPrefix?: string;
};
matrix: {
accessToken: string;
Expand Down
3 changes: 3 additions & 0 deletions packages/common/src/db/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import mongoose from "mongoose";
import logger from "../logger";

import * as queries from "./queries";
import { setDbConnectivity } from "../metrics";

// [name, client, version, null, networkId]
export type NodeDetails = [
Expand Down Expand Up @@ -37,6 +38,8 @@ export class Db {
resolve(true);
});

mongoose.connection.on("open", () => setDbConnectivity(true));
mongoose.connection.on("disconnected", () => setDbConnectivity(false));
mongoose.connection.on("error", (err) => {
logger.error(`MongoDB connection issue: ${err}`, dbLabel);
reject(err);
Expand Down
2 changes: 2 additions & 0 deletions packages/common/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import * as Models from "./db/models";
import ScoreKeeper from "./scorekeeper/scorekeeper";
import * as Jobs from "./scorekeeper/jobs/specificJobs";
import MatrixBot from "./matrix";
import * as metrics from "./metrics";

export {
ApiHandler,
Expand All @@ -31,4 +32,5 @@ export {
ScoreKeeper,
Jobs,
MatrixBot,
metrics,
};
166 changes: 166 additions & 0 deletions packages/common/src/metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import promClient from "prom-client";

import { ConfigSchema } from "./config";
import { jobStatusEmitter } from "./Events";
import { JobStatus } from "./scorekeeper/jobs/JobsClass";
import logger from "./logger";

export const metricsLabel = { label: "Metrics" };

export type Metrics = {
counters: {
nominations: promClient.Counter;
blocksScanned: promClient.Counter;
jobRuns: promClient.Counter;
};
gauges: {
dbConnectivity: promClient.Gauge;
telemetryConnectivity: promClient.Gauge;
chainRpcConnectivity: promClient.Gauge;
jobProgress: promClient.Gauge;
latestBlock: promClient.Gauge;
};
histograms: {
jobExecutionTime: promClient.Histogram;
};
};

let metrics: Metrics | null = null;

export function setupMetrics(config: ConfigSchema): void {
const prefix = config.global.prometheusPrefix ?? "otv_backend_";
metrics = {
counters: {
blocksScanned: new promClient.Counter({
name: `${prefix}blocks_scanned_total`,
help: "amount of blocks scanned",
}),
jobRuns: new promClient.Counter({
name: `${prefix}jobs_runs_total`,
help: "job events by name and status",
labelNames: ["name", "status"],
}),
nominations: new promClient.Counter({
name: `${prefix}nominations_total`,
help: "nominations count by account",
labelNames: ["account"],
}),
},
gauges: {
dbConnectivity: new promClient.Gauge({
name: `${prefix}db_connectivity`,
help: "database connection status; 0 or 1",
}),
telemetryConnectivity: new promClient.Gauge({
name: `${prefix}telemetry_connectivity`,
help: "telemetry connection status; 0 or 1",
}),
chainRpcConnectivity: new promClient.Gauge({
name: `${prefix}chain_rpc_connectivity`,
help: "chain RPC connection status by chain (relay, people); 0 or 1",
labelNames: ["chain"],
}),
jobProgress: new promClient.Gauge({
name: `${prefix}job_progress`,
help: "job progress by name, in percents from 0 to 100",
labelNames: ["name"],
}),
latestBlock: new promClient.Gauge({
name: `${prefix}latest_block`,
help: "latest scanned block number",
}),
},
histograms: {
jobExecutionTime: new promClient.Histogram({
name: `${prefix}job_execution_time_seconds`,
help: "job timings; only successful jobs are counted",
labelNames: ["name"],
buckets: growingBuckets(0, 10, 30),
}),
},
};

promClient.collectDefaultMetrics({ prefix });
followJobs(metrics);
}

function getMetrics(): Metrics {
if (metrics === null) {
throw new Error("getMetrics() was called before setupMetrics()");
}

return metrics;
}

export function registerBlockScan(blockNumber: number): void {
const metrics = getMetrics();
metrics.counters.blocksScanned.inc();
metrics.gauges.latestBlock.set(blockNumber);
}

export function registerNomination(account: string): void {
getMetrics().counters.nominations.inc({ account });
}

export function setDbConnectivity(isConnected: boolean): void {
getMetrics().gauges.dbConnectivity.set(isConnected ? 1 : 0);
}

export function setTelemetryConnectivity(isConnected: boolean): void {
getMetrics().gauges.telemetryConnectivity.set(isConnected ? 1 : 0);
}

export function setChainRpcConnectivity(
chain: string,
isConnected: boolean,
): void {
getMetrics().gauges.chainRpcConnectivity.set({ chain }, isConnected ? 1 : 0);
}

function followJobs(metrics: Metrics): void {
const updateJob = (data: JobStatus) => {
metrics.counters.jobRuns.inc({ name: data.name, status: data.status });
};
jobStatusEmitter.on("jobStarted", updateJob);
jobStatusEmitter.on("jobRunning", updateJob);
jobStatusEmitter.on("jobErrored", updateJob);
jobStatusEmitter.on("jobFinished", (data: JobStatus) => {
updateJob(data);
if (!data.executedAt) {
logger.error(
`${data.name}: jobFinished event was emitted, but jobStatus.executedAt is empty`,
metricsLabel,
);
return;
}
const duration = Math.ceil((Date.now() - data.executedAt) / 1000);
metrics.histograms.jobExecutionTime.observe({ name: data.name }, duration);
});

jobStatusEmitter.on("jobProgress", (data: JobStatus) => {
metrics.gauges.jobProgress.set({ name: data.name }, data.progress);
});
}

// prom-client has two types of bucket generation: linear, which would create
// too much buckets for our case, and exponential, which would either create too
// little of buckets, or buckets with floating points.
// This creates buckets like [10, 30, 60, 100, 150, 210, 280, 360, 450, ...]
function growingBuckets(
start: number,
factor: number,
count: number,
): number[] {
const buckets = [];

let currentBucket = start;
for (let i = 0; i < count; i++) {
currentBucket += factor * i;
buckets.push(currentBucket);
}
return buckets;
}

export async function renderMetrics(): Promise<string> {
return promClient.register.metrics();
}
2 changes: 2 additions & 0 deletions packages/common/src/nominator/nominator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import EventEmitter from "eventemitter3";
import { sendProxyDelayTx, sendProxyTx } from "./NominatorTx";
import { getNominatorChainInfo } from "./NominatorChainInfo";
import { NominatorState, NominatorStatus } from "../types";
import { registerNomination } from "../metrics";

export const nominatorLabel = { label: "Nominator" };

Expand Down Expand Up @@ -354,6 +355,7 @@ export default class Nominator extends EventEmitter {
tx = api.tx.staking.nominate(targets);
await this.sendStakingTx(tx, targets);
}
registerNomination(await this.stash());
await queries.setLastNominatedEraIndex(currentEra);
return true;
} catch (e) {
Expand Down
1 change: 1 addition & 0 deletions packages/common/src/scorekeeper/jobs/JobsClass.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export type JobConfig = {
export interface JobStatus {
name: string;
updated: number;
executedAt?: number;
enabled?: boolean;
runCount?: number;
status: string;
Expand Down
4 changes: 4 additions & 0 deletions packages/common/src/scorekeeper/jobs/cron/SetupCronJob.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,14 @@ export const setupCronJob = async (
}

isRunning = true;
const executedAt = Date.now();
logger.info(`Executing ${name}.`, loggerLabel);
const runningStatus: JobStatus = {
status: "running",
name: name,
runCount: jobRunCount,
updated: Date.now(),
executedAt,
};
jobStatusEmitter.emit("jobRunning", runningStatus);

Expand All @@ -58,6 +60,7 @@ export const setupCronJob = async (
runCount: jobRunCount,
updated: Date.now(),
error: JSON.stringify(e),
executedAt,
};

jobStatusEmitter.emit("jobErrored", errorStatus);
Expand All @@ -69,6 +72,7 @@ export const setupCronJob = async (
name: name,
runCount: jobRunCount,
updated: Date.now(),
executedAt,
};
jobStatusEmitter.emit("jobFinished", finishedStatus);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { Block, EventRecord, Phase } from "@polkadot/types/interfaces";
import type { FrameSystemEventRecord } from "@polkadot/types/lookup";
import { Exposure } from "../../../chaindata/queries/ValidatorPref";
import { JobNames } from "../JobConfigs";
import { registerBlockScan } from "../../../metrics";

export const blockdataLabel = { label: "Block" };

Expand Down Expand Up @@ -200,6 +201,7 @@ export const processBlock = async (
await queries.setBlockIndex(blockNumber, blockIndex?.latest);
}
const end = Date.now();
registerBlockScan(blockNumber);
logger.info(
`Done processing block #${blockNumber} (${(end - start) / 1000}s)`,
blockdataLabel,
Expand Down
2 changes: 1 addition & 1 deletion packages/common/test/ApiHandler/ApiHandler.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ describe("ApiHandler Integration Tests", () => {
let handler: ApiHandler;

beforeAll(async () => {
handler = new ApiHandler(KusamaEndpoints);
handler = new ApiHandler("relay", KusamaEndpoints);
}, TIMEOUT_DURATION);

it(
Expand Down
Loading