-
Notifications
You must be signed in to change notification settings - Fork 298
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DAOS-8331 client: add client side metrics
1. Move TLS to common, so both client and server can have TLS, which metrics can be attached metrics on it. 2. Add object metrics on the client side, enabled by export DAOS_CLIENT_METRICS=1. And client metrics are organized as "root/jobid/pid/xxxxx" And root/jobid/pid are stored in an independent share memory, which will only be destoryed if all jobs are destroyed. During each daos thread initialization, it will created another shmem (pid/xxx), which all metrics of the thread will be attached to. And this metric will be destoryed once the thread exit, though if DAOS_CLIENT_METRICS_RETAIN is set, these client metrics will be retain, and it can be retrieved by daos_metrics --jobid 3. Add DAOS_METRIC_DUMP_ENV dump metrics from current thread once it exit. 4. Some fixes in telemetrics about conv_ptr during re-open the share memory. 5. Add daos_metrics --jobid XXX options to retrieve all metrics of the job. Required-githooks: true Signed-off-by: Di Wang <di.wang@intel.com>
- Loading branch information
Di Wang
committed
Dec 18, 2023
1 parent
40dd690
commit 63a3f72
Showing
26 changed files
with
1,282 additions
and
459 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
/* | ||
* (C) Copyright 2020-2023 Intel Corporation. | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause-Patent | ||
*/ | ||
|
||
#include <fcntl.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <sys/utsname.h> | ||
#include <daos/common.h> | ||
#include <daos/job.h> | ||
#include <daos/tls.h> | ||
#include <gurt/telemetry_common.h> | ||
#include <gurt/telemetry_consumer.h> | ||
#include <gurt/telemetry_producer.h> | ||
|
||
#define INIT_JOB_NUM 1024 | ||
bool client_metric; | ||
bool client_metric_retain; | ||
|
||
#define MAX_IDS_SIZE(num) (num * D_TM_METRIC_SIZE) | ||
/* The client side metrics structure looks like | ||
* root/job_id/pid/.... | ||
*/ | ||
int | ||
dc_tm_init(void) | ||
{ | ||
struct d_tm_node_t *job_node; | ||
struct d_tm_context *current_ctx; | ||
struct daos_thread_local_storage *dtls; | ||
int metrics_tag; | ||
pid_t pid; | ||
int rc; | ||
|
||
d_getenv_bool(DAOS_CLIENT_METRICS_ENV, &client_metric); | ||
if (!client_metric) | ||
return 0; | ||
|
||
d_getenv_bool(DAOS_CLIENT_METRICS_RETAIN_ENV, &client_metric_retain); | ||
|
||
metrics_tag = D_TM_CLIENT_PROCESS | D_TM_OPEN_OR_CREATE; | ||
if (client_metric_retain) | ||
metrics_tag |= D_TM_RETAIN_SHMEM; | ||
else | ||
metrics_tag |= D_TM_RETAIN_SHMEM_IF_NON_EMPTY; | ||
|
||
rc = d_tm_init(DC_TM_JOB_ROOT_ID, MAX_IDS_SIZE(INIT_JOB_NUM), metrics_tag); | ||
if (rc != 0) { | ||
D_ERROR("init job root id %u: %d\n", DC_TM_JOB_ROOT_ID, rc); | ||
return rc; | ||
} | ||
|
||
pid = getpid(); | ||
D_INFO("INIT %s/%u metrics\n", dc_jobid, pid); | ||
rc = d_tm_add_metric(&job_node, D_TM_DIRECTORY, | ||
"job id directory", "dir", | ||
"%s/%u", dc_jobid, pid); | ||
/* Close job root sheme */ | ||
d_tm_fini(); | ||
if (rc != 0) { | ||
D_ERROR("add metric %s/%u failed: %d\n", dc_jobid, pid, rc); | ||
D_GOTO(out, rc); | ||
} | ||
|
||
metrics_tag = D_TM_CLIENT_PROCESS; | ||
if (client_metric_retain) | ||
metrics_tag |= D_TM_RETAIN_SHMEM; | ||
rc = d_tm_init(pid, MAX_IDS_SIZE(INIT_JOB_NUM), metrics_tag); | ||
if (rc != 0) | ||
D_GOTO(out, rc); | ||
|
||
current_ctx = d_tm_open(pid); | ||
if (current_ctx == NULL) | ||
D_GOTO(out, rc = -DER_NOMEM); | ||
|
||
dtls = dc_tls_init(DAOS_CLI_TAG, pid); | ||
if (dtls == NULL) | ||
D_GOTO(out, rc = -DER_NOMEM); | ||
out: | ||
if (rc) | ||
d_tm_fini(); | ||
|
||
return rc; | ||
} | ||
|
||
static void | ||
iter_dump(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, | ||
char *path, int format, int opt_fields, void *arg) | ||
{ | ||
d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg); | ||
} | ||
|
||
static int | ||
dump_tm_file(const char *dump_path) | ||
{ | ||
struct d_tm_context *ctx; | ||
struct d_tm_node_t *root; | ||
uint32_t filter; | ||
FILE *dump_file; | ||
pid_t pid; | ||
int rc = 0; | ||
|
||
dump_file = fopen(dump_path, "w+"); | ||
if (dump_file == NULL) { | ||
D_INFO("cannot open %s", dump_path); | ||
return -DER_INVAL; | ||
} | ||
|
||
filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | D_TM_MEMINFO | | ||
D_TM_TIMER_SNAPSHOT | D_TM_GAUGE | D_TM_STATS_GAUGE; | ||
|
||
pid = getpid(); | ||
ctx = d_tm_open(pid); | ||
if (ctx == NULL) | ||
D_GOTO(close, rc = -DER_NOMEM); | ||
|
||
root = d_tm_get_root(ctx); | ||
if (root == NULL) { | ||
D_INFO("no root exist for %u\n", pid); | ||
D_GOTO(close_ctx, rc = -DER_NONEXIST); | ||
} | ||
|
||
d_tm_print_field_descriptors(0, dump_file); | ||
|
||
d_tm_iterate(ctx, root, 0, filter, NULL, D_TM_CSV, 0, iter_dump, dump_file); | ||
|
||
close_ctx: | ||
d_tm_close(&ctx); | ||
close: | ||
fclose(dump_file); | ||
return rc; | ||
} | ||
|
||
void | ||
dc_tm_fini() | ||
{ | ||
pid_t pid = getpid(); | ||
char *dump_path; | ||
int rc; | ||
|
||
if (!client_metric) | ||
return; | ||
|
||
dump_path = getenv(METRIC_DUMP_ENV); | ||
if (dump_path != NULL) | ||
dump_tm_file(dump_path); | ||
|
||
dc_tls_fini(); | ||
/* close current pid ctct */ | ||
d_tm_fini(); | ||
|
||
if (client_metric_retain) | ||
return; | ||
|
||
rc = d_tm_init(DC_TM_JOB_ROOT_ID, MAX_IDS_SIZE(INIT_JOB_NUM), | ||
D_TM_CLIENT_PROCESS | D_TM_RETAIN_SHMEM_IF_NON_EMPTY | | ||
D_TM_OPEN_OR_CREATE); | ||
if (rc != 0) | ||
return; | ||
|
||
D_INFO("delete pid %s/%u\n", dc_jobid, pid); | ||
d_tm_del_node("%s/%d", dc_jobid, pid); | ||
d_tm_del_node("%s", dc_jobid); | ||
|
||
d_tm_fini(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.