From d0bee8f118f0f438391c4d4b78599077a6ae4811 Mon Sep 17 00:00:00 2001
From: Dennis Lawler <4824647+drawlerr@users.noreply.github.com>
Date: Tue, 18 Feb 2020 11:02:54 -0800
Subject: [PATCH] Use zeroes instead of whitespaces as padding bytes (#899)

---
 docs/migrate.rst           | 10 +++++++
 esrally/track/params.py    |  2 +-
 tests/track/params_test.py | 54 ++++++++++++++++++--------------------
 3 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/docs/migrate.rst b/docs/migrate.rst
index eeffdf93a..7e8de6fc3 100644
--- a/docs/migrate.rst
+++ b/docs/migrate.rst
@@ -1,6 +1,16 @@
 Migration Guide
 ===============
 
+Migrating to Rally 1.4.1
+------------------------
+
+Document IDs are now padded with 0 instead of spaces
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When Rally 1.4.1 generates document IDs, it will pad them with '0' instead of ' ' - 0000000000 instead of '         0', etc.
+Elasticsearch has optimizations for numeric IDs, so observed performance in Elasticsearch should improve slightly.
+
+
 Migrating to Rally 1.4.0
 ------------------------
 
diff --git a/esrally/track/params.py b/esrally/track/params.py
index 4d9f7a118..08618630e 100644
--- a/esrally/track/params.py
+++ b/esrally/track/params.py
@@ -622,7 +622,7 @@ def build_conflicting_ids(conflicts, docs_to_index, offset, shuffle=random.shuff
     all_ids = [0] * docs_to_index
     for i in range(docs_to_index):
         # always consider the offset as each client will index its own range and we don't want uncontrolled conflicts across clients
-        all_ids[i] = "%10d" % (offset + i)
+        all_ids[i] = "%010d" % (offset + i)
     if conflicts == IndexIdConflict.RandomConflicts:
         shuffle(all_ids)
     return all_ids
diff --git a/tests/track/params_test.py b/tests/track/params_test.py
index 7ad57600d..f182c30b0 100644
--- a/tests/track/params_test.py
+++ b/tests/track/params_test.py
@@ -88,34 +88,34 @@ def test_no_id_conflicts(self):
     def test_sequential_conflicts(self):
         self.assertEqual(
             [
-                "         0",
-                "         1",
-                "         2",
-                "         3",
-                "         4",
-                "         5",
-                "         6",
-                "         7",
-                "         8",
-                "         9",
-                "        10",
+                '0000000000',
+                '0000000001',
+                '0000000002',
+                '0000000003',
+                '0000000004',
+                '0000000005',
+                '0000000006',
+                '0000000007',
+                '0000000008',
+                '0000000009',
+                '0000000010'
             ],
             params.build_conflicting_ids(params.IndexIdConflict.SequentialConflicts, 11, 0)
         )
 
         self.assertEqual(
             [
-                "         5",
-                "         6",
-                "         7",
-                "         8",
-                "         9",
-                "        10",
-                "        11",
-                "        12",
-                "        13",
-                "        14",
-                "        15",
+                '0000000005',
+                '0000000006',
+                '0000000007',
+                '0000000008',
+                '0000000009',
+                '0000000010',
+                '0000000011',
+                '0000000012',
+                '0000000013',
+                '0000000014',
+                '0000000015'
             ],
             params.build_conflicting_ids(params.IndexIdConflict.SequentialConflicts, 11, 5)
         )
@@ -125,18 +125,14 @@ def test_random_conflicts(self):
 
         self.assertEqual(
             [
-                "         2",
-                "         1",
-                "         0"
+                '0000000002', '0000000001', '0000000000'
             ],
             params.build_conflicting_ids(params.IndexIdConflict.RandomConflicts, 3, 0, shuffle=predictable_shuffle)
         )
 
         self.assertEqual(
             [
-                "         7",
-                "         6",
-                "         5"
+                '0000000007', '0000000006', '0000000005'
             ],
             params.build_conflicting_ids(params.IndexIdConflict.RandomConflicts, 3, 5, shuffle=predictable_shuffle)
         )
@@ -670,7 +666,7 @@ def number_of_bulks(corpora, partition_index, total_partitions, bulk_size):
 
     def test_build_conflicting_ids(self):
         self.assertIsNone(params.build_conflicting_ids(params.IndexIdConflict.NoConflicts, 3, 0))
-        self.assertEqual(["         0", "         1", "         2"],
+        self.assertEqual(["0000000000", "0000000001", "0000000002"],
                          params.build_conflicting_ids(params.IndexIdConflict.SequentialConflicts, 3, 0))
         # we cannot tell anything specific about the contents...
         self.assertEqual(3, len(params.build_conflicting_ids(params.IndexIdConflict.RandomConflicts, 3, 0)))