From ee357ec45f36b3fa58d805005be62bb2b3d71c1b Mon Sep 17 00:00:00 2001
From: Romain Beaumont <romain.rom1@gmail.com>
Date: Thu, 10 Feb 2022 03:12:22 +0100
Subject: [PATCH] retry whole sharding if it fails

trying to work around #137
---
 img2dataset/reader.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/img2dataset/reader.py b/img2dataset/reader.py
index 77d19b1..6b64982 100644
--- a/img2dataset/reader.py
+++ b/img2dataset/reader.py
@@ -109,11 +109,20 @@ def write_shard(shard_id):
                     else:
                         raise e
 
-        shards = []
-        # thread pool to make it faster to write files to low latency file systems (ie s3, hdfs)
-        with ThreadPool(32) as thread_pool:
-            for shard in thread_pool.imap_unordered(write_shard, range(number_shards)):
-                shards.append(shard)
+        for i in range(10):
+            shards = []
+            # thread pool to make it faster to write files to low latency file systems (ie s3, hdfs)
+            try:
+                with ThreadPool(32) as thread_pool:
+                    for shard in thread_pool.imap_unordered(write_shard, range(number_shards)):
+                        shards.append(shard)
+                break
+            except Exception as e:  # pylint: disable=broad-except
+                if i != 9:
+                    print("retrying whole sharding to write to files due to error:", e)
+                    time.sleep(2 * i)
+                else:
+                    raise e
 
         shards.sort(key=lambda k: k[0])