-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathsplit-large-text.escript
executable file
·84 lines (72 loc) · 2.48 KB
/
split-large-text.escript
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env escript
%%% path-split.escript
%%%
%%% If you're parsing a large block of text (e.g. a few K or more)
%%% what's the more efficient method?
%%%
%%% This test is similar to path-split but deals with larger content.
%%%
%%% The test is to split an IO list consisting of 10k chunks separated
%%% by a delimiter.
%%%
%%% These are representative of the results on my laptop (Erlang 18)
%%%
%%% re_string: 349.630 us (2860.17 per second)
%%% re_binary: 60.838 us (16437.10 per second)
%%% re_binary_compiled: 59.579 us (16784.44 per second)
%%% binary_split: 157.960 us (6330.72 per second)
%%%
%%% In the case of these larger files, the regular expression out
%%% performs binary split provided a binary is returned rather than a
%%% list.
%%%
%%% It's interesting that compiling the pattern provides a negligible
%%% improvement.
%%%
-mode(compile).
-include("bench.hrl").
-define(CHUNK_FILE, "10k.txt").
-define(DELIMITER, <<"\n\n">>).
-define(CHUNKS, 5).
-define(TRIALS, 1000).
main(_) ->
{ok, Bin} = file:read_file(?CHUNK_FILE),
Str = lists:duplicate(?CHUNKS, [Bin, ?DELIMITER]),
Expected = ?CHUNKS + 1,
test_re_string(Str, ?DELIMITER, Expected),
test_re_binary(Str, ?DELIMITER, Expected),
test_re_binary_compiled(Str, ?DELIMITER, Expected),
test_binary_split(Str, ?DELIMITER, Expected).
test_re_string(Str, Delim, Expected) ->
bench(
"re_string",
fun() -> split_re_string(Str, Delim, Expected) end,
?TRIALS).
split_re_string(Str, Delim, Expected) ->
Parts = re:split(Str, Delim, [{return, list}]),
Expected = length(Parts).
test_re_binary(Str, Delim, Expected) ->
bench(
"re_binary",
fun() -> split_re_binary(Str, Delim, Expected) end,
?TRIALS).
split_re_binary(Str, Delim, Expected) ->
Parts = re:split(Str, Delim, [{return, binary}]),
Expected = length(Parts).
test_re_binary_compiled(Str, Delim, Expected) ->
{ok, Compiled} = re:compile(Delim),
bench(
"re_binary_compiled",
fun() -> split_re_binary_compiled(Str, Compiled, Expected) end,
?TRIALS).
split_re_binary_compiled(Str, Compiled, Expected) ->
Parts = re:split(Str, Compiled, [{return, binary}]),
Expected = length(Parts).
test_binary_split(Str, Delim, Expected) ->
bench(
"binary_split",
fun() -> split_binary(Str, Delim, Expected) end,
?TRIALS).
split_binary(Str, Delim, Expected) ->
Parts = binary:split(iolist_to_binary(Str), Delim, [global]),
Expected = length(Parts).