-
Notifications
You must be signed in to change notification settings - Fork 131
/
Copy pathbenchmark_sensitivity_analysis.py
191 lines (166 loc) · 6.11 KB
/
benchmark_sensitivity_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Estimate the cumulative reward of random episodes on benchmarks.
This script estimates the cumulative reward for a random episode on a benchmark
by running trials. A trial is an episode in which a random number of random
actions are performed and the total cumulative reward is recorded.
Example Usage
-------------
Evaluate the impact on LLVM codesize of random actions on the cBench-crc32
benchmark:
$ python -m sensitivity_analysis.benchmark_sensitivity_analysis \
--env=llvm-v0 --reward=IrInstructionCountO3 \
--benchmark=cbench-v1/crc32 --num_benchmark_sensitivity_trials=25
Evaluate the LLVM codesize episode reward on all benchmarks:
$ python -m sensitivity_analysis.benchmark_sensitivity_analysis \
--env=llvm-v0 --reward=IrInstructionCountO3
"""
import random
from concurrent.futures import ThreadPoolExecutor
from itertools import islice
from pathlib import Path
from typing import List, Optional, Union
import numpy as np
from absl import app, flags
from sensitivity_analysis.sensitivity_analysis_eval import (
SensitivityAnalysisResult,
run_sensitivity_analysis,
)
import compiler_gym.util.flags.nproc # noqa
from compiler_gym.envs import CompilerEnv
from compiler_gym.service.proto import Benchmark
from compiler_gym.util.flags.benchmark_from_flags import benchmark_from_flags
from compiler_gym.util.flags.env_from_flags import env_from_flags
from compiler_gym.util.runfiles_path import create_user_logs_dir
from compiler_gym.util.timer import Timer
flags.DEFINE_integer(
"num_benchmark_sensitivity_trials",
100,
"The number of trials to perform when estimating the episode reward of each benchmark. "
"A trial is a random episode of a benchmark. Increasing this number increases the "
"number of trials performed, leading to a higher fidelity estimate of the reward "
"potential for a benchmark.",
)
flags.DEFINE_integer(
"min_steps",
10,
"The minimum number of random steps to make in a single trial.",
)
flags.DEFINE_integer(
"max_steps",
100,
"The maximum number of random steps to make in a single trial.",
)
flags.DEFINE_integer(
"max_benchmark_attempts_multiplier",
5,
"A trial may fail because the environment crashes, or an action produces an invalid state. "
"Limit the total number of trials performed for each action to "
"max_benchmark_attempts_multiplier * num_trials.",
)
FLAGS = flags.FLAGS
def get_rewards(
benchmark: Union[Benchmark, str],
reward_space: str,
num_trials: int,
min_steps: int,
max_steps: int,
max_attempts_multiplier: int = 5,
) -> SensitivityAnalysisResult:
"""Run random trials to get a list of num_trials episode rewards."""
rewards, runtimes = [], []
num_attempts = 0
while (
num_attempts < max_attempts_multiplier * num_trials
and len(rewards) < num_trials
):
num_attempts += 1
with env_from_flags(benchmark=benchmark) as env:
env.observation_space = None
env.reward_space = None
env.reset(benchmark=benchmark)
benchmark = env.benchmark
with Timer() as t:
reward = run_one_trial(env, reward_space, min_steps, max_steps)
if reward is not None:
rewards.append(reward)
runtimes.append(t.time)
return SensitivityAnalysisResult(
name=env.benchmark, runtimes=np.array(runtimes), rewards=np.array(rewards)
)
def run_one_trial(
env: CompilerEnv, reward_space: str, min_steps: int, max_steps: int
) -> Optional[float]:
"""Run a random number of random steps in an environment and return the
cumulative reward.
:return: A cumulative reward.
"""
num_steps = random.randint(min_steps, max_steps)
warmup_actions = [env.action_space.sample() for _ in range(num_steps)]
env.reward_space = reward_space
_, _, done, _ = env.multistep(warmup_actions)
if done:
return None
return env.episode_reward
def run_benchmark_sensitivity_analysis(
benchmarks: List[Union[Benchmark, str]],
rewards_path: Path,
runtimes_path: Path,
reward: str,
num_trials: int,
min_steps: int,
max_steps: int,
nproc: int,
max_attempts_multiplier: int = 5,
):
"""Estimate the cumulative reward of random walks on a list of benchmarks."""
with ThreadPoolExecutor(max_workers=nproc) as executor:
analysis_futures = [
executor.submit(
get_rewards,
benchmark,
reward,
num_trials,
min_steps,
max_steps,
max_attempts_multiplier,
)
for benchmark in benchmarks
]
return run_sensitivity_analysis(
analysis_futures=analysis_futures,
runtimes_path=runtimes_path,
rewards_path=rewards_path,
)
def main(argv):
"""Main entry point."""
argv = FLAGS(argv)
if len(argv) != 1:
raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")
# Determine the benchmark that is being analyzed, or use all of them.
benchmark = benchmark_from_flags()
if benchmark:
benchmarks = [benchmark]
else:
with env_from_flags() as env:
benchmarks = islice(env.benchmarks, 100)
logs_dir = Path(
FLAGS.output_dir or create_user_logs_dir("benchmark_sensitivity_analysis")
)
rewards_path = logs_dir / f"benchmarks_{FLAGS.reward}.csv"
runtimes_path = logs_dir / f"benchmarks_{FLAGS.reward}_runtimes.csv"
run_benchmark_sensitivity_analysis(
rewards_path=rewards_path,
runtimes_path=runtimes_path,
benchmarks=benchmarks,
reward=FLAGS.reward,
num_trials=FLAGS.num_benchmark_sensitivity_trials,
min_steps=FLAGS.min_steps,
max_steps=FLAGS.max_steps,
nproc=FLAGS.nproc,
max_attempts_multiplier=FLAGS.max_benchmark_attempts_multiplier,
)
if __name__ == "__main__":
app.run(main)