From ac74006fffe8b3084d69dd93478be6417e737bd4 Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Thu, 28 Mar 2024 13:27:21 -0700 Subject: [PATCH] [receiver/oracledb] Fix incorrect values for a couple of metrics (#32028) **Description:** Values were being scraped incorrectly for the metrics `oracledb.tablespace_size.limit` and `oracledb.tablespace_size.usage`. The changes these metrics to be scraped from the [`DBA_TABLESPACE_USAGE_METRICS`](https://docs.oracle.com/en/database/oracle/oracle-database/19/refrn/DBA_TABLESPACE_USAGE_METRICS.html#GUID-FE479528-BB37-4B55-92CF-9EC19EDF4F46) table. This results in a slight loss of granularity in these metrics, as values will always be in multiples of the respective tablespace's block size, but I think the clarity and simplicity is worth the trade off. Note: The value of the usage metric was generally close to the expected value, but the limit was being calculated as potential theoretical capacity, unbound by server capacity. For example, in testing in a docker container on my local machine, limit was set to **17TB**. This doesn't line up with user expectations. **Link to tracking Issue:** Fixes https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/31451 **Testing:** Updated existing tests, added a couple new ones. Also, the original issue filed was comparing `DBA_TABLESPACE_USAGE_METRICS` output for percent used to what we got from `usage/limit * 100`. Here's the local testing outputs compared to show they now line up. ``` 2024-03-27T16:31:57.938-0700 info oracledbreceiver/scraper.go:285 DBA_TABLESPACE_USAGE_METRICS: Tablespace name: SYSTEM, used space: 111288, tablespace size: 3518587, percent used: 3.16286054600895188892586711654422641816 {"kind": "receiver", "name": "oracledb", "data_type": "metrics"} ``` ``` Metric #20 Descriptor: -> Name: oracledb.tablespace_size.usage -> Description: Used tablespace in bytes. -> Unit: By -> DataType: Gauge NumberDataPoints #0 Data point attributes: -> tablespace_name: Str(SYSTEM) StartTimestamp: 2024-03-27 23:31:56.873576 +0000 UTC Timestamp: 2024-03-27 23:32:12.523295 +0000 UTC Value: 911671296 ``` ``` Metric #19 Descriptor: -> Name: oracledb.tablespace_size.limit -> Description: Maximum size of tablespace in bytes, -1 if unlimited. -> Unit: By -> DataType: Gauge NumberDataPoints #0 Data point attributes: -> tablespace_name: Str(SYSTEM) StartTimestamp: 2024-03-27 23:31:56.873576 +0000 UTC Timestamp: 2024-03-27 23:32:12.523295 +0000 UTC Value: 28824264704 ``` Doing the same calculation, we get: ``` (911671296 / 28824264704) * 100 = ~3.16% ``` --- .chloggen/oracledb_tablespace_fix.yaml | 27 +++++++++ .../internal/metadata/generated_metrics.go | 7 +-- .../metadata/generated_metrics_test.go | 2 +- receiver/oracledbreceiver/metadata.yaml | 1 - receiver/oracledbreceiver/scraper.go | 58 +++++++++++-------- receiver/oracledbreceiver/scraper_test.go | 31 +++++++--- 6 files changed, 85 insertions(+), 41 deletions(-) create mode 100644 .chloggen/oracledb_tablespace_fix.yaml diff --git a/.chloggen/oracledb_tablespace_fix.yaml b/.chloggen/oracledb_tablespace_fix.yaml new file mode 100644 index 000000000000..14e1a0b68959 --- /dev/null +++ b/.chloggen/oracledb_tablespace_fix.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: oracledbreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Fix incorrect values being set for oracledb.tablespace_size.limit and oracledb.tablespace_size.usage + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [31451] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/receiver/oracledbreceiver/internal/metadata/generated_metrics.go b/receiver/oracledbreceiver/internal/metadata/generated_metrics.go index 593cec51498d..2cd119b5f9b9 100644 --- a/receiver/oracledbreceiver/internal/metadata/generated_metrics.go +++ b/receiver/oracledbreceiver/internal/metadata/generated_metrics.go @@ -1768,13 +1768,8 @@ func (mb *MetricsBuilder) RecordOracledbTablespaceSizeLimitDataPoint(ts pcommon. } // RecordOracledbTablespaceSizeUsageDataPoint adds a data point to oracledb.tablespace_size.usage metric. -func (mb *MetricsBuilder) RecordOracledbTablespaceSizeUsageDataPoint(ts pcommon.Timestamp, inputVal string, tablespaceNameAttributeValue string) error { - val, err := strconv.ParseInt(inputVal, 10, 64) - if err != nil { - return fmt.Errorf("failed to parse int64 for OracledbTablespaceSizeUsage, value was %s: %w", inputVal, err) - } +func (mb *MetricsBuilder) RecordOracledbTablespaceSizeUsageDataPoint(ts pcommon.Timestamp, val int64, tablespaceNameAttributeValue string) { mb.metricOracledbTablespaceSizeUsage.recordDataPoint(mb.startTime, ts, val, tablespaceNameAttributeValue) - return nil } // RecordOracledbTransactionsLimitDataPoint adds a data point to oracledb.transactions.limit metric. diff --git a/receiver/oracledbreceiver/internal/metadata/generated_metrics_test.go b/receiver/oracledbreceiver/internal/metadata/generated_metrics_test.go index 1a5789f9e6c2..2827dd3ab22c 100644 --- a/receiver/oracledbreceiver/internal/metadata/generated_metrics_test.go +++ b/receiver/oracledbreceiver/internal/metadata/generated_metrics_test.go @@ -143,7 +143,7 @@ func TestMetricsBuilder(t *testing.T) { defaultMetricsCount++ allMetricsCount++ - mb.RecordOracledbTablespaceSizeUsageDataPoint(ts, "1", "tablespace_name-val") + mb.RecordOracledbTablespaceSizeUsageDataPoint(ts, 1, "tablespace_name-val") defaultMetricsCount++ allMetricsCount++ diff --git a/receiver/oracledbreceiver/metadata.yaml b/receiver/oracledbreceiver/metadata.yaml index 43965e4df223..401d6d02e06b 100644 --- a/receiver/oracledbreceiver/metadata.yaml +++ b/receiver/oracledbreceiver/metadata.yaml @@ -233,7 +233,6 @@ metrics: enabled: true gauge: value_type: int - input_type: string unit: By oracledb.db_block_gets: description: Number of times a current block was requested from the buffer cache. diff --git a/receiver/oracledbreceiver/scraper.go b/receiver/oracledbreceiver/scraper.go index 9da8f711e657..5016e0c94cbe 100644 --- a/receiver/oracledbreceiver/scraper.go +++ b/receiver/oracledbreceiver/scraper.go @@ -38,8 +38,10 @@ const ( consistentGets = "consistent gets" sessionCountSQL = "select status, type, count(*) as VALUE FROM v$session GROUP BY status, type" systemResourceLimitsSQL = "select RESOURCE_NAME, CURRENT_UTILIZATION, LIMIT_VALUE, CASE WHEN TRIM(INITIAL_ALLOCATION) LIKE 'UNLIMITED' THEN '-1' ELSE TRIM(INITIAL_ALLOCATION) END as INITIAL_ALLOCATION, CASE WHEN TRIM(LIMIT_VALUE) LIKE 'UNLIMITED' THEN '-1' ELSE TRIM(LIMIT_VALUE) END as LIMIT_VALUE from v$resource_limit" - tablespaceUsageSQL = "select TABLESPACE_NAME, BYTES from DBA_DATA_FILES" - tablespaceMaxSpaceSQL = "select TABLESPACE_NAME, (BLOCK_SIZE*MAX_EXTENTS) AS VALUE FROM DBA_TABLESPACES" + tablespaceUsageSQL = ` + select um.TABLESPACE_NAME, um.USED_SPACE, um.TABLESPACE_SIZE, ts.BLOCK_SIZE + FROM DBA_TABLESPACE_USAGE_METRICS um INNER JOIN DBA_TABLESPACES ts + ON um.TABLESPACE_NAME = ts.TABLESPACE_NAME` ) type dbProviderFunc func() (*sql.DB, error) @@ -48,7 +50,6 @@ type clientProviderFunc func(*sql.DB, string, *zap.Logger) dbClient type scraper struct { statsClient dbClient - tablespaceMaxSpaceClient dbClient tablespaceUsageClient dbClient systemResourceLimitsClient dbClient sessionCountClient dbClient @@ -88,7 +89,6 @@ func (s *scraper) start(context.Context, component.Host) error { s.sessionCountClient = s.clientProviderFunc(s.db, sessionCountSQL, s.logger) s.systemResourceLimitsClient = s.clientProviderFunc(s.db, systemResourceLimitsSQL, s.logger) s.tablespaceUsageClient = s.clientProviderFunc(s.db, tablespaceUsageSQL, s.logger) - s.tablespaceMaxSpaceClient = s.clientProviderFunc(s.db, tablespaceMaxSpaceSQL, s.logger) return nil } @@ -274,7 +274,9 @@ func (s *scraper) scrape(ctx context.Context) (pmetric.Metrics, error) { } } } - if s.metricsBuilderConfig.Metrics.OracledbTablespaceSizeUsage.Enabled { + + if s.metricsBuilderConfig.Metrics.OracledbTablespaceSizeUsage.Enabled || + s.metricsBuilderConfig.Metrics.OracledbTablespaceSizeLimit.Enabled { rows, err := s.tablespaceUsageClient.metricRows(ctx) if err != nil { scrapeErrors = append(scrapeErrors, fmt.Errorf("error executing %s: %w", tablespaceUsageSQL, err)) @@ -282,33 +284,39 @@ func (s *scraper) scrape(ctx context.Context) (pmetric.Metrics, error) { now := pcommon.NewTimestampFromTime(time.Now()) for _, row := range rows { tablespaceName := row["TABLESPACE_NAME"] - err := s.mb.RecordOracledbTablespaceSizeUsageDataPoint(now, row["BYTES"], tablespaceName) + usedSpaceBlockCount, err := strconv.ParseInt(row["USED_SPACE"], 10, 64) if err != nil { - scrapeErrors = append(scrapeErrors, err) + scrapeErrors = append(scrapeErrors, fmt.Errorf("failed to parse int64 for OracledbTablespaceSizeUsage, value was %s: %w", row["USED_SPACE"], err)) + continue } - } - } - } - if s.metricsBuilderConfig.Metrics.OracledbTablespaceSizeLimit.Enabled { - rows, err := s.tablespaceMaxSpaceClient.metricRows(ctx) - if err != nil { - scrapeErrors = append(scrapeErrors, fmt.Errorf("error executing %s: %w", tablespaceMaxSpaceSQL, err)) - } else { - now := pcommon.NewTimestampFromTime(time.Now()) - for _, row := range rows { - tablespaceName := row["TABLESPACE_NAME"] - var val int64 - inputVal := row["VALUE"] - if inputVal == "" { - val = -1 + + tablespaceSizeOriginal := row["TABLESPACE_SIZE"] + var tablespaceSizeBlockCount int64 + // Tablespace size should never be empty using the DBA_TABLESPACE_USAGE_METRICS query. This logic is done + // to preserve backward compatibility for with the original metric gathered from querying DBA_TABLESPACES + if tablespaceSizeOriginal == "" { + tablespaceSizeBlockCount = -1 } else { - val, err = strconv.ParseInt(inputVal, 10, 64) + tablespaceSizeBlockCount, err = strconv.ParseInt(tablespaceSizeOriginal, 10, 64) if err != nil { - scrapeErrors = append(scrapeErrors, fmt.Errorf("failed to parse int64 for OracledbTablespaceSizeLimit, value was %s: %w", inputVal, err)) + scrapeErrors = append(scrapeErrors, fmt.Errorf("failed to parse int64 for OracledbTablespaceSizeLimit, value was %s: %w", tablespaceSizeOriginal, err)) continue } } - s.mb.RecordOracledbTablespaceSizeLimitDataPoint(now, val, tablespaceName) + + blockSize, err := strconv.ParseInt(row["BLOCK_SIZE"], 10, 64) + if err != nil { + scrapeErrors = append(scrapeErrors, fmt.Errorf("failed to parse int64 for OracledbBlockSize, value was %s: %w", row["BLOCK_SIZE"], err)) + continue + } + + s.mb.RecordOracledbTablespaceSizeUsageDataPoint(now, usedSpaceBlockCount*blockSize, tablespaceName) + + if tablespaceSizeBlockCount < 0 { + s.mb.RecordOracledbTablespaceSizeLimitDataPoint(now, -1, tablespaceName) + } else { + s.mb.RecordOracledbTablespaceSizeLimitDataPoint(now, tablespaceSizeBlockCount*blockSize, tablespaceName) + } } } } diff --git a/receiver/oracledbreceiver/scraper_test.go b/receiver/oracledbreceiver/scraper_test.go index 5a31ab7b0e18..c1ad547e6555 100644 --- a/receiver/oracledbreceiver/scraper_test.go +++ b/receiver/oracledbreceiver/scraper_test.go @@ -36,8 +36,7 @@ var queryResponses = map[string][]metricRow{ sessionCountSQL: {{"VALUE": "1"}}, systemResourceLimitsSQL: {{"RESOURCE_NAME": "processes", "CURRENT_UTILIZATION": "3", "MAX_UTILIZATION": "10", "INITIAL_ALLOCATION": "100", "LIMIT_VALUE": "100"}, {"RESOURCE_NAME": "locks", "CURRENT_UTILIZATION": "3", "MAX_UTILIZATION": "10", "INITIAL_ALLOCATION": "-1", "LIMIT_VALUE": "-1"}}, - tablespaceUsageSQL: {{"TABLESPACE_NAME": "SYS", "BYTES": "1024"}}, - tablespaceMaxSpaceSQL: {{"TABLESPACE_NAME": "SYS", "VALUE": "1024"}}, + tablespaceUsageSQL: {{"TABLESPACE_NAME": "SYS", "USED_SPACE": "111288", "TABLESPACE_SIZE": "3518587", "BLOCK_SIZE": "8192"}}, } func TestScraper_Scrape(t *testing.T) { @@ -76,11 +75,11 @@ func TestScraper_Scrape(t *testing.T) { { name: "no limit on tablespace", dbclientFn: func(_ *sql.DB, s string, _ *zap.Logger) dbClient { - if s == tablespaceMaxSpaceSQL { + if s == tablespaceUsageSQL { return &fakeDbClient{Responses: [][]metricRow{ { - {"TABLESPACE_NAME": "SYS", "VALUE": "1024"}, - {"TABLESPACE_NAME": "FOO", "VALUE": ""}, + {"TABLESPACE_NAME": "SYS", "TABLESPACE_SIZE": "1024", "USED_SPACE": "111288", "BLOCK_SIZE": "8192"}, + {"TABLESPACE_NAME": "FOO", "TABLESPACE_SIZE": "", "USED_SPACE": "111288", "BLOCK_SIZE": "8192"}, }, }} } @@ -92,11 +91,11 @@ func TestScraper_Scrape(t *testing.T) { { name: "bad value on tablespace", dbclientFn: func(_ *sql.DB, s string, _ *zap.Logger) dbClient { - if s == tablespaceMaxSpaceSQL { + if s == tablespaceUsageSQL { return &fakeDbClient{Responses: [][]metricRow{ { - {"TABLESPACE_NAME": "SYS", "VALUE": "1024"}, - {"TABLESPACE_NAME": "FOO", "VALUE": "ert"}, + {"TABLESPACE_NAME": "SYS", "TABLESPACE_SIZE": "1024", "USED_SPACE": "111288", "BLOCK_SIZE": "8192"}, + {"TABLESPACE_NAME": "FOO", "TABLESPACE_SIZE": "ert", "USED_SPACE": "111288", "BLOCK_SIZE": "8192"}, }, }} } @@ -106,6 +105,22 @@ func TestScraper_Scrape(t *testing.T) { }, errWanted: `failed to parse int64 for OracledbTablespaceSizeLimit, value was ert: strconv.ParseInt: parsing "ert": invalid syntax`, }, + { + name: "Empty block size", + dbclientFn: func(_ *sql.DB, s string, _ *zap.Logger) dbClient { + if s == tablespaceUsageSQL { + return &fakeDbClient{Responses: [][]metricRow{ + { + {"TABLESPACE_NAME": "SYS", "TABLESPACE_SIZE": "1024", "USED_SPACE": "111288", "BLOCK_SIZE": ""}, + }, + }} + } + return &fakeDbClient{Responses: [][]metricRow{ + queryResponses[s], + }} + }, + errWanted: `failed to parse int64 for OracledbBlockSize, value was : strconv.ParseInt: parsing "": invalid syntax`, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) {