Skip to content

Commit

Permalink
fix tests for hfh>=0.14
Browse files Browse the repository at this point in the history
  • Loading branch information
testbot committed Apr 24, 2023
1 parent becb31e commit 0c51b8e
Showing 1 changed file with 8 additions and 9 deletions.
17 changes: 8 additions & 9 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import pyarrow as pa
import pytest
import requests
from packaging import version

from datasets import config
Expand Down Expand Up @@ -322,9 +323,9 @@ def offline(mode=OfflineSimulationMode.CONNECTION_FAILS, timeout=1e-16):
HF_DATASETS_OFFLINE_SET_TO_1: the HF_DATASETS_OFFLINE environment variable is set to 1.
This makes the http/ftp calls of the library instantly fail and raise an OfflineModeEmabled error.
"""
from requests import request as online_request
online_request = requests.Session().request

def timeout_request(method, url, **kwargs):
def timeout_request(session, method, url, **kwargs):
# Change the url to an invalid url so that the connection hangs
invalid_url = "https://10.255.255.1"
if kwargs.get("timeout") is None:
Expand All @@ -342,18 +343,16 @@ def timeout_request(method, url, **kwargs):
e.args = (max_retry_error,)
raise

def offline_socket(*args, **kwargs):
raise OSError("Offline mode is enabled.")
def raise_connection_error(session, prepared_request, **kwargs):
raise requests.ConnectionError("Offline mode is enabled.", request=prepared_request)

if mode is OfflineSimulationMode.CONNECTION_FAILS:
# inspired from https://stackoverflow.com/a/18601897
with patch("socket.socket", offline_socket):
with patch("requests.Session.send", raise_connection_error):
yield
elif mode is OfflineSimulationMode.CONNECTION_TIMES_OUT:
# inspired from https://stackoverflow.com/a/904609
with patch("requests.request", timeout_request):
with patch("requests.api.request", timeout_request):
yield
with patch("requests.Session.request", timeout_request):
yield
elif mode is OfflineSimulationMode.HF_DATASETS_OFFLINE_SET_TO_1:
with patch("datasets.config.HF_DATASETS_OFFLINE", True):
yield
Expand Down

1 comment on commit 0c51b8e

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Show benchmarks

PyArrow==8.0.0

Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.006831 / 0.011353 (-0.004522) 0.004524 / 0.011008 (-0.006485) 0.098529 / 0.038508 (0.060020) 0.028282 / 0.023109 (0.005172) 0.392040 / 0.275898 (0.116142) 0.428786 / 0.323480 (0.105306) 0.005120 / 0.007986 (-0.002866) 0.004804 / 0.004328 (0.000476) 0.077324 / 0.004250 (0.073073) 0.037536 / 0.037052 (0.000484) 0.386258 / 0.258489 (0.127769) 0.435536 / 0.293841 (0.141695) 0.032036 / 0.128546 (-0.096510) 0.011712 / 0.075646 (-0.063934) 0.325242 / 0.419271 (-0.094029) 0.042622 / 0.043533 (-0.000911) 0.397115 / 0.255139 (0.141976) 0.414618 / 0.283200 (0.131418) 0.092133 / 0.141683 (-0.049550) 1.475698 / 1.452155 (0.023543) 1.533100 / 1.492716 (0.040384)

Benchmark: benchmark_getitem_100B.json

metric get_batch_of_1024_random_rows get_batch_of_1024_rows get_first_row get_last_row
new / old (diff) 0.189567 / 0.018006 (0.171560) 0.397510 / 0.000490 (0.397021) 0.008084 / 0.000200 (0.007884) 0.000196 / 0.000054 (0.000141)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.023024 / 0.037411 (-0.014387) 0.095893 / 0.014526 (0.081367) 0.103347 / 0.176557 (-0.073209) 0.165152 / 0.737135 (-0.571983) 0.106592 / 0.296338 (-0.189746)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.417434 / 0.215209 (0.202225) 4.180761 / 2.077655 (2.103107) 1.866981 / 1.504120 (0.362861) 1.666866 / 1.541195 (0.125672) 1.682408 / 1.468490 (0.213918) 0.694791 / 4.584777 (-3.889986) 3.419826 / 3.745712 (-0.325886) 1.874776 / 5.269862 (-3.395085) 1.157164 / 4.565676 (-3.408513) 0.083126 / 0.424275 (-0.341149) 0.012357 / 0.007607 (0.004750) 0.520824 / 0.226044 (0.294780) 5.211923 / 2.268929 (2.942995) 2.300738 / 55.444624 (-53.143886) 1.968365 / 6.876477 (-4.908111) 2.068246 / 2.142072 (-0.073826) 0.805141 / 4.805227 (-4.000086) 0.153600 / 6.500664 (-6.347064) 0.067549 / 0.075469 (-0.007920)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 1.214334 / 1.841788 (-0.627454) 13.705229 / 8.074308 (5.630921) 14.163908 / 10.191392 (3.972516) 0.129262 / 0.680424 (-0.551162) 0.016726 / 0.534201 (-0.517475) 0.382790 / 0.579283 (-0.196493) 0.390972 / 0.434364 (-0.043392) 0.449816 / 0.540337 (-0.090522) 0.541920 / 1.386936 (-0.845016)
PyArrow==latest
Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.006586 / 0.011353 (-0.004767) 0.004683 / 0.011008 (-0.006325) 0.076679 / 0.038508 (0.038170) 0.028199 / 0.023109 (0.005090) 0.343301 / 0.275898 (0.067403) 0.375393 / 0.323480 (0.051913) 0.005004 / 0.007986 (-0.002982) 0.003371 / 0.004328 (-0.000957) 0.075778 / 0.004250 (0.071527) 0.036891 / 0.037052 (-0.000161) 0.343745 / 0.258489 (0.085256) 0.381595 / 0.293841 (0.087754) 0.032583 / 0.128546 (-0.095963) 0.011716 / 0.075646 (-0.063930) 0.084942 / 0.419271 (-0.334330) 0.042774 / 0.043533 (-0.000759) 0.342546 / 0.255139 (0.087407) 0.367376 / 0.283200 (0.084176) 0.096075 / 0.141683 (-0.045608) 1.502563 / 1.452155 (0.050408) 1.564811 / 1.492716 (0.072095)

Benchmark: benchmark_getitem_100B.json

metric get_batch_of_1024_random_rows get_batch_of_1024_rows get_first_row get_last_row
new / old (diff) 0.228758 / 0.018006 (0.210751) 0.396806 / 0.000490 (0.396316) 0.003078 / 0.000200 (0.002878) 0.000070 / 0.000054 (0.000016)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.025499 / 0.037411 (-0.011912) 0.100392 / 0.014526 (0.085866) 0.106599 / 0.176557 (-0.069958) 0.158279 / 0.737135 (-0.578856) 0.110487 / 0.296338 (-0.185851)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.440076 / 0.215209 (0.224867) 4.405048 / 2.077655 (2.327394) 2.063675 / 1.504120 (0.559555) 1.858243 / 1.541195 (0.317049) 1.875009 / 1.468490 (0.406518) 0.698328 / 4.584777 (-3.886449) 3.396462 / 3.745712 (-0.349250) 1.837206 / 5.269862 (-3.432655) 1.147884 / 4.565676 (-3.417792) 0.082744 / 0.424275 (-0.341531) 0.012369 / 0.007607 (0.004762) 0.541332 / 0.226044 (0.315287) 5.444219 / 2.268929 (3.175291) 2.508741 / 55.444624 (-52.935883) 2.151977 / 6.876477 (-4.724500) 2.215746 / 2.142072 (0.073673) 0.805148 / 4.805227 (-4.000080) 0.151284 / 6.500664 (-6.349380) 0.067571 / 0.075469 (-0.007899)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 1.305786 / 1.841788 (-0.536001) 14.170384 / 8.074308 (6.096075) 13.946874 / 10.191392 (3.755482) 0.152721 / 0.680424 (-0.527703) 0.016745 / 0.534201 (-0.517456) 0.383553 / 0.579283 (-0.195730) 0.390495 / 0.434364 (-0.043869) 0.445024 / 0.540337 (-0.095313) 0.535229 / 1.386936 (-0.851707)

Please sign in to comment.