Skip to content

Commit

Permalink
Improve sentry performance with batching support (getsentry#1)
Browse files Browse the repository at this point in the history
  • Loading branch information
Krzysztof Płocharz committed Oct 26, 2018
1 parent 28d497f commit 2450998
Show file tree
Hide file tree
Showing 11 changed files with 260 additions and 414 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
)

# The version of sentry
VERSION = '9.0.0'
VERSION = '9.0.0-sf.2'

# Hack to prevent stupid "TypeError: 'NoneType' object is not callable" error
# in multiprocessing/util.py _exit_function when running `python
Expand Down
10 changes: 2 additions & 8 deletions src/sentry/buffer/redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,12 @@ def process_pending(self, partition=None):
for key in keys:
pending_buffer.append(key)
if pending_buffer.full():
process_incr.apply_async(
kwargs={
'batch_keys': pending_buffer.flush(),
}
)
process_incr(batch_keys=pending_buffer.flush())
conn.target([host_id]).zrem(pending_key, *keys)

# queue up remainder of pending keys
if not pending_buffer.empty():
process_incr.apply_async(kwargs={
'batch_keys': pending_buffer.flush(),
})
process_incr(batch_keys=pending_buffer.flush())

metrics.timing('buffer.pending-size', keycount)
finally:
Expand Down
4 changes: 1 addition & 3 deletions src/sentry/conf/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,7 @@ def env(key, default='', type=None):
'django.contrib.staticfiles', 'crispy_forms', 'debug_toolbar',
'raven.contrib.django.raven_compat', 'rest_framework', 'sentry', 'sentry.analytics',
'sentry.analytics.events', 'sentry.nodestore', 'sentry.search', 'sentry.lang.java',
'sentry.lang.javascript', 'sentry.lang.native', 'sentry.plugins.sentry_interface_types',
'sentry.plugins.sentry_mail', 'sentry.plugins.sentry_urls', 'sentry.plugins.sentry_useragents',
'sentry.plugins.sentry_webhooks', 'social_auth', 'sudo', 'sentry.tagstore',
'sentry.lang.javascript', 'sentry.lang.native', 'social_auth', 'sudo', 'sentry.tagstore',
)

import django
Expand Down
30 changes: 20 additions & 10 deletions src/sentry/coreapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,6 @@ def safely_load_json_string(self, json_string):
if isinstance(json_string, six.binary_type):
json_string = json_string.decode('utf-8')
obj = json.loads(json_string)
assert isinstance(obj, dict)
except Exception as e:
# This error should be caught as it suggests that there's a
# bug somewhere in the client's code.
Expand Down Expand Up @@ -348,17 +347,19 @@ def ensure_does_not_have_ip(self, data):
data['sdk'].pop('client_ip', None)

def insert_data_to_database(self, data, start_time=None, from_reprocessing=False):
task = from_reprocessing and \
preprocess_event_from_reprocessing or preprocess_event
if start_time is None:
start_time = time()
# we might be passed LazyData
if isinstance(data, LazyData):
data = dict(data.items())
cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id'])
data = list(data)
if not isinstance(data, list):
data = [data]
first_item = data[0]
cache_key = 'e:{1}:{0}'.format(first_item['project'], first_item['event_id'])
default_cache.set(cache_key, data, timeout=3600)
task = from_reprocessing and \
preprocess_event_from_reprocessing or preprocess_event
task.delay(cache_key=cache_key, start_time=start_time,
event_id=data['event_id'])
task.delay(cache_key=cache_key, start_time=start_time, event_id=None)


class MinidumpApiHelper(ClientApiHelper):
Expand Down Expand Up @@ -494,6 +495,17 @@ def _decode(self):
# version of the data

# mutates data
processed_data = []
if isinstance(data, list):
for item in data:
processed_data.append(self._process_item(auth, item, helper, project))
else:
processed_data = [self._process_item(auth, data, helper, project)]

self._data = processed_data
self._decoded = True

def _process_item(self, auth, data, helper, project):
data = helper.validate_data(data)

data['project'] = self._project.id
Expand All @@ -506,9 +518,7 @@ def _decode(self):
'client_ip': self._client_ip,
'auth': self._auth,
})

self._data = data
self._decoded = True
return data

def __getitem__(self, name):
if not self._decoded:
Expand Down
10 changes: 2 additions & 8 deletions src/sentry/db/models/fields/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ def to_python(self, value):
elif not value:
value = {}

return NodeData(self, None, value)
if 'node_id' in value:
node_id = value.pop('node_id')
data = None
Expand All @@ -155,14 +156,7 @@ def get_prep_value(self, value):
# save ourselves some storage
return None

# TODO(dcramer): we should probably do this more intelligently
# and manually
if not value.id:
value.id = nodestore.create(value.data)
else:
nodestore.set(value.id, value.data)

return compress(pickle.dumps({'node_id': value.id}))
return compress(pickle.dumps(value.data))


if hasattr(models, 'SubfieldBase'):
Expand Down
Loading

0 comments on commit 2450998

Please sign in to comment.