Skip to content

Commit

Permalink
retry whole sharding if it fails
Browse files Browse the repository at this point in the history
trying to work around #137
  • Loading branch information
rom1504 committed Feb 10, 2022
1 parent 8adcf09 commit ee357ec
Showing 1 changed file with 14 additions and 5 deletions.
19 changes: 14 additions & 5 deletions img2dataset/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,20 @@ def write_shard(shard_id):
else:
raise e

shards = []
# thread pool to make it faster to write files to low latency file systems (ie s3, hdfs)
with ThreadPool(32) as thread_pool:
for shard in thread_pool.imap_unordered(write_shard, range(number_shards)):
shards.append(shard)
for i in range(10):
shards = []
# thread pool to make it faster to write files to low latency file systems (ie s3, hdfs)
try:
with ThreadPool(32) as thread_pool:
for shard in thread_pool.imap_unordered(write_shard, range(number_shards)):
shards.append(shard)
break
except Exception as e: # pylint: disable=broad-except
if i != 9:
print("retrying whole sharding to write to files due to error:", e)
time.sleep(2 * i)
else:
raise e

shards.sort(key=lambda k: k[0])

Expand Down

0 comments on commit ee357ec

Please sign in to comment.