Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalize namespace and metric names. #747

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions datadog/dogstatsd/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from datadog.dogstatsd.route import get_default_route
from datadog.dogstatsd.container import ContainerID
from datadog.util.compat import is_p3k, text
from datadog.util.format import normalize_tags
from datadog.util.format import normalize_tags, normalize_metric_name
from datadog.version import __version__

# Logging
Expand Down Expand Up @@ -318,7 +318,7 @@ def __init__(
constant_tags = []
self.constant_tags = constant_tags + env_tags
if namespace is not None:
namespace = text(namespace)
namespace = normalize_metric_name(text(namespace))
self.namespace = namespace
self.use_ms = use_ms
self.default_sample_rate = default_sample_rate
Expand Down Expand Up @@ -763,7 +763,7 @@ def _serialize_metric(self, metric, metric_type, value, tags, sample_rate=1):
# Create/format the metric packet
return "%s%s:%s|%s%s%s%s" % (
(self.namespace + ".") if self.namespace else "",
metric,
normalize_metric_name(metric),
value,
metric_type,
("|@" + text(sample_rate)) if sample_rate != 1 else "",
Expand Down
10 changes: 8 additions & 2 deletions datadog/util/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from datadog.util.compat import conditional_lru_cache

TAG_INVALID_CHARS_RE = re.compile(r"[^\w\d_\-:/\.]", re.UNICODE)
TAG_INVALID_CHARS_SUBS = "_"
METRIC_NAME_INVALID_CHARS_RE = re.compile(r"[^\w\d_\.]", re.UNICODE)
INVALID_CHARS_SUBS = "_"


def pretty_json(obj):
Expand All @@ -33,10 +34,15 @@ def force_to_epoch_seconds(epoch_sec_or_dt):

@conditional_lru_cache
def _normalize_tags_with_cache(tag_list):
return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list]
return [TAG_INVALID_CHARS_RE.sub(INVALID_CHARS_SUBS, tag) for tag in tag_list]


def normalize_tags(tag_list):
# We have to turn our input tag list into a non-mutable tuple for it to
# be hashable (and thus usable) by the @lru_cache decorator.
return _normalize_tags_with_cache(tuple(tag_list))


@conditional_lru_cache
def normalize_metric_name(metric_name):
return METRIC_NAME_INVALID_CHARS_RE.sub(INVALID_CHARS_SUBS, metric_name)
4 changes: 4 additions & 0 deletions tests/unit/dogstatsd/test_statsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,10 @@ def test_gauge(self):
self.statsd.gauge('gauge', 123.4)
self.assert_equal_telemetry('gauge:123.4|g\n', self.recv(2))

def test_gauge_with_unescaped_name(self):
self.statsd.gauge('my|new.p3rfect#_gauge', 123.4)
self.assert_equal_telemetry('my_new.p3rfect__gauge:123.4|g\n', self.recv(2))

def test_counter(self):
self.statsd.increment('page.views')
self.statsd.flush()
Expand Down
18 changes: 17 additions & 1 deletion tests/unit/util/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pytest

from datadog.util.format import construct_url, normalize_tags
from datadog.util.format import construct_url, normalize_tags, normalize_metric_name


class TestConstructURL:
Expand Down Expand Up @@ -52,3 +52,19 @@ class TestNormalizeTags:
@pytest.mark.parametrize("original_tags,expected_tags", test_data)
def test_normalize_tags(self, original_tags, expected_tags):
assert normalize_tags(original_tags) == expected_tags

class TestNormalizeMetricName:
"""
Test of the format's `normalize_metric_name` functionality
"""
test_data = [
('', ''),
('just a metric name', 'just_a_metric_name'),
('xyz.abc!@#$%^&*()0987654321{}}{', 'xyz.abc__________0987654321____'),
('xyz.abc_123', 'xyz.abc_123'),
('абśжż西アطر', 'абśжż西アطر'),
('a😃😃b', 'a__b'),
]
@pytest.mark.parametrize("original_metric_name,expected_metric_name", test_data)
def test_normalize_metric_name(self, original_metric_name, expected_metric_name):
assert normalize_metric_name(original_metric_name) == expected_metric_name