Skip to content

Commit

Permalink
Use zeroes instead of whitespaces as padding bytes (#899)
Browse files Browse the repository at this point in the history
  • Loading branch information
drawlerr authored Feb 18, 2020
1 parent fdeb3d5 commit d0bee8f
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 30 deletions.
10 changes: 10 additions & 0 deletions docs/migrate.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
Migration Guide
===============

Migrating to Rally 1.4.1
------------------------

Document IDs are now padded with 0 instead of spaces
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

When Rally 1.4.1 generates document IDs, it will pad them with '0' instead of ' ' - 0000000000 instead of ' 0', etc.
Elasticsearch has optimizations for numeric IDs, so observed performance in Elasticsearch should improve slightly.


Migrating to Rally 1.4.0
------------------------

Expand Down
2 changes: 1 addition & 1 deletion esrally/track/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ def build_conflicting_ids(conflicts, docs_to_index, offset, shuffle=random.shuff
all_ids = [0] * docs_to_index
for i in range(docs_to_index):
# always consider the offset as each client will index its own range and we don't want uncontrolled conflicts across clients
all_ids[i] = "%10d" % (offset + i)
all_ids[i] = "%010d" % (offset + i)
if conflicts == IndexIdConflict.RandomConflicts:
shuffle(all_ids)
return all_ids
Expand Down
54 changes: 25 additions & 29 deletions tests/track/params_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,34 +88,34 @@ def test_no_id_conflicts(self):
def test_sequential_conflicts(self):
self.assertEqual(
[
" 0",
" 1",
" 2",
" 3",
" 4",
" 5",
" 6",
" 7",
" 8",
" 9",
" 10",
'0000000000',
'0000000001',
'0000000002',
'0000000003',
'0000000004',
'0000000005',
'0000000006',
'0000000007',
'0000000008',
'0000000009',
'0000000010'
],
params.build_conflicting_ids(params.IndexIdConflict.SequentialConflicts, 11, 0)
)

self.assertEqual(
[
" 5",
" 6",
" 7",
" 8",
" 9",
" 10",
" 11",
" 12",
" 13",
" 14",
" 15",
'0000000005',
'0000000006',
'0000000007',
'0000000008',
'0000000009',
'0000000010',
'0000000011',
'0000000012',
'0000000013',
'0000000014',
'0000000015'
],
params.build_conflicting_ids(params.IndexIdConflict.SequentialConflicts, 11, 5)
)
Expand All @@ -125,18 +125,14 @@ def test_random_conflicts(self):

self.assertEqual(
[
" 2",
" 1",
" 0"
'0000000002', '0000000001', '0000000000'
],
params.build_conflicting_ids(params.IndexIdConflict.RandomConflicts, 3, 0, shuffle=predictable_shuffle)
)

self.assertEqual(
[
" 7",
" 6",
" 5"
'0000000007', '0000000006', '0000000005'
],
params.build_conflicting_ids(params.IndexIdConflict.RandomConflicts, 3, 5, shuffle=predictable_shuffle)
)
Expand Down Expand Up @@ -670,7 +666,7 @@ def number_of_bulks(corpora, partition_index, total_partitions, bulk_size):

def test_build_conflicting_ids(self):
self.assertIsNone(params.build_conflicting_ids(params.IndexIdConflict.NoConflicts, 3, 0))
self.assertEqual([" 0", " 1", " 2"],
self.assertEqual(["0000000000", "0000000001", "0000000002"],
params.build_conflicting_ids(params.IndexIdConflict.SequentialConflicts, 3, 0))
# we cannot tell anything specific about the contents...
self.assertEqual(3, len(params.build_conflicting_ids(params.IndexIdConflict.RandomConflicts, 3, 0)))
Expand Down

0 comments on commit d0bee8f

Please sign in to comment.