Skip to content

Commit

Permalink
toy_multisplit changes in multisplit test suite.
Browse files Browse the repository at this point in the history
I now confirm that toy_multisplit is producing identical
results to multisplit (with the right inputs).  I'm also
timing multisplit now, and... hoo boy!  toy_multisplit
can be like 5x faster!  I guess regular expressions are
slow.  Something to think about... hmm.
  • Loading branch information
larryhastings committed Sep 5, 2023
1 parent 78dc8d5 commit 475ab8a
Showing 1 changed file with 29 additions and 7 deletions.
36 changes: 29 additions & 7 deletions tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -1030,16 +1030,30 @@ def toy_multisplit(s, separators):
* does not contain any substring that appears as an
element in the separators iterable.
In other words, if
a = toy_multisplit(s, seps)
b = [x for x in multisplit(s, seps, keep=ALTERNATING, separate=True) if x]
then "a == b" would be True. (But, we can't really test
it this way, because )
This is my second revision of toy_multisplit,
adding support for bytes, and speeding things
up a great deal in the general case.
(toy_multisplit is usually faster than
toy_multisplit is usually faster than
toy_multisplit_original, and it's *way* faster
when there are a lot of separators, or exactly
one separator. it's occasionally a little
slower than toy_multisplit_original when
there are only a handful of separators.)
there are only a handful of separators.
And, actually, toy_multisplit is a lot faster
than the real multisplit! I guess that's the price
you pay for general-purpose code. (Though it
does make me think... a couple of specialized
versions of multisplit we dispatch to for the
most common use cases might speed things up
quite a bit!)
"""
if not isinstance(separators, (list, tuple)):
separators = [separators[i:i+1] for i in range(len(separators))]
Expand Down Expand Up @@ -1156,11 +1170,21 @@ def t(s, seps, expected):
print(f'toy_multisplit(s={s!r}, seps={seps!r}) -> {result!r}')
assert result == expected, f"toy_multisplit:\n result={result!r}\n!=\nexpected={expected!r}"

if want_prints: # pragma: no cover
start = time_perf_counter_ns()
result = [x for x in big.multisplit(s, seps, keep=big.ALTERNATING, separate=True) if x]
if want_prints: # pragma: no cover
end = time_perf_counter_ns()
multisplit_time = str(end - start)
print(f'multisplit(s={s!r}, seps={seps!r}, keep=ALTERNATING, separate=True) -> {result!r}')
assert result == expected, f"multisplit:\n result={result!r}\n!=\nexpected={expected!r}"

if want_prints: # pragma: no cover
max_length = max([len(original_time), len(str_time), len(bytes_time)])
print(f"original time: {original_time:>{max_length}}ns")
print(f" str time: {str_time:>{max_length}}ns")
print(f" bytes time: {bytes_time:>{max_length}}ns")
print(f" original time: {original_time:>{max_length}}ns")
print(f" str time: {str_time:>{max_length}}ns")
print(f" bytes time: {bytes_time:>{max_length}}ns")
print(f"multisplit time: {multisplit_time:>{max_length}}ns")
print()
print()

Expand All @@ -1175,8 +1199,6 @@ def t(s, seps, expected):
[' ', ' ', '\t', ' ', 'abc', ' ', 'de', ' ', ' ', 'fgh', ' ', '\n', '\t', 'ijk', ' ', ' ', ' ', ' ', 'lm', ' ', ' '])




def multisplit_tester(s, separators=None):
"""
s is the test string you want split.
Expand Down

0 comments on commit 475ab8a

Please sign in to comment.