Skip to content

Commit

Permalink
[dogstatsd] handle properly utf8 packets
Browse files Browse the repository at this point in the history
Fixes #1256. We should always consider that dogstatsd
receives a utf-8 encoded string through its socket,
but still support unicode python strings in case we
submit things programatically (e.g. useful for tests)
  • Loading branch information
LeoCavaille committed Jan 9, 2015
1 parent 2bd69dc commit 1d35512
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 14 deletions.
10 changes: 9 additions & 1 deletion aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ def parse_event_packet(self, packet):
# Event syntax:
# _e{5,4}:title|body|meta
name = name_and_metadata[0]
metadata = unicode(name_and_metadata[1])
metadata = name_and_metadata[1]
title_length, text_length = name.split(',')
title_length = int(title_length[3:])
text_length = int(text_length[:-1])
Expand Down Expand Up @@ -498,6 +498,14 @@ def parse_event_packet(self, packet):
raise Exception(u'Unparseable event packet: %s' % packet)

def submit_packets(self, packets):
# Usually we should always decode the string
# as utf-8 because `packets` passed through a
# network socket, but if submit_packets is used
# programatically and packets is unicode already
# then do not decode!
if not isinstance(packets, unicode):
packets = packets.decode('utf-8', 'replace')

for packet in packets.splitlines():

if not packet.strip():
Expand Down
28 changes: 15 additions & 13 deletions tests/test_dogstatsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,37 +601,39 @@ def test_event_title(self):
stats = MetricsAggregator('myhost')
stats.submit_packets('_e{0,4}:|text')
stats.submit_packets(u'_e{9,4}:2intitulé|text')
stats.submit_packets(u'_e{9,4}:2intitulé|text'.encode('utf-8')) # comes from socket
stats.submit_packets('_e{14,4}:3title content|text')
stats.submit_packets('_e{14,4}:4title|content|text')
stats.submit_packets('_e{13,4}:5title\\ntitle|text') # \n stays escaped

events = self.sort_events(stats.flush_events())

assert len(events) == 5
first, second, third, fourth, fifth = events
assert len(events) == 6

nt.assert_equal(first['msg_title'], '')
nt.assert_equal(second['msg_title'], u'2intitulé')
nt.assert_equal(third['msg_title'], '3title content')
nt.assert_equal(fourth['msg_title'], '4title|content')
nt.assert_equal(fifth['msg_title'], '5title\\ntitle')
nt.assert_equal(events[0]['msg_title'], '')
nt.assert_equal(events[1]['msg_title'], u'2intitulé')
nt.assert_equal(events[2]['msg_title'], u'2intitulé')
nt.assert_equal(events[3]['msg_title'], '3title content')
nt.assert_equal(events[4]['msg_title'], '4title|content')
nt.assert_equal(events[5]['msg_title'], '5title\\ntitle')

def test_event_text(self):
stats = MetricsAggregator('myhost')
stats.submit_packets('_e{2,0}:t1|')
stats.submit_packets('_e{2,12}:t2|text|content')
stats.submit_packets('_e{2,23}:t3|First line\\nSecond line') # \n is a newline
stats.submit_packets(u'_e{2,19}:t4|♬ †øU †øU ¥ºu T0µ ♪') # utf-8 compliant
stats.submit_packets(u'_e{2,19}:t4|♬ †øU †øU ¥ºu T0µ ♪'.encode('utf-8')) # utf-8 compliant

events = self.sort_events(stats.flush_events())

assert len(events) == 4
first, second, third, fourth = events
assert len(events) == 5

nt.assert_equal(first['msg_text'], '')
nt.assert_equal(second['msg_text'], 'text|content')
nt.assert_equal(third['msg_text'], 'First line\nSecond line')
nt.assert_equal(fourth['msg_text'], u'♬ †øU †øU ¥ºu T0µ ♪')
nt.assert_equal(events[0]['msg_text'], '')
nt.assert_equal(events[1]['msg_text'], 'text|content')
nt.assert_equal(events[2]['msg_text'], 'First line\nSecond line')
nt.assert_equal(events[3]['msg_text'], u'♬ †øU †øU ¥ºu T0µ ♪')
nt.assert_equal(events[4]['msg_text'], u'♬ †øU †øU ¥ºu T0µ ♪')

def test_recent_point_threshold(self):
threshold = 100
Expand Down

0 comments on commit 1d35512

Please sign in to comment.