Skip to content

Commit

Permalink
[MetaSchedule] Modify Profiler Timers (#11735)
Browse files Browse the repository at this point in the history
Minor modification to scoped timers to cover 99% of all the time cost during MS tuning. Allow `ApplyHistoryBest` and `TaskExtraction` time to be counted during tune_relay.
  • Loading branch information
zxybazh authored Jun 15, 2022
1 parent d0cbde0 commit 6ce41be
Show file tree
Hide file tree
Showing 10 changed files with 95 additions and 79 deletions.
17 changes: 10 additions & 7 deletions python/tvm/meta_schedule/tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from .measure_callback import MeasureCallback
from .mutator import Mutator
from .postproc import Postproc
from .profiler import Profiler
from .runner import Runner
from .schedule_rule import ScheduleRule
from .search_strategy import EvolutionarySearch, ReplayFunc, ReplayTrace
Expand Down Expand Up @@ -563,7 +564,8 @@ def tune_relay(
target = default_config.target(target)
# pylint: enable=protected-access,
# parse the tuning contexts
extracted_tasks = extract_task_from_relay(mod, target, params)
with Profiler.timeit("TaskExtraction"):
extracted_tasks = extract_task_from_relay(mod, target, params)
database = tune_extracted_tasks(
extracted_tasks,
config,
Expand All @@ -579,9 +581,10 @@ def tune_relay(
mutator_probs=mutator_probs,
num_threads=num_threads,
)
with target, autotvm_silencer(), ApplyHistoryBest(database):
with PassContext(
opt_level=3,
config={"relay.backend.use_meta_schedule": True},
):
return relay_build(mod, target=target, params=params)
with Profiler.timeit("ApplyHistoryBest"):
with target, autotvm_silencer(), ApplyHistoryBest(database):
with PassContext(
opt_level=3,
config={"relay.backend.use_meta_schedule": True},
):
return relay_build(mod, target=target, params=params)
2 changes: 1 addition & 1 deletion src/meta_schedule/measure_callback/add_to_database.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class AddToDatabaseNode : public MeasureCallbackNode {
if (!task_scheduler->database.defined()) {
return;
}
auto _ = Profiler::TimedScope("AddToDatabase");
auto _ = Profiler::TimedScope("MeasureCallback/AddToDatabase");
TuneContext task = task_scheduler->tasks[task_id];
Database database = task_scheduler->database.value();
Workload workload = database->CommitWorkload(task->mod.value());
Expand Down
2 changes: 1 addition & 1 deletion src/meta_schedule/measure_callback/echo_statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ class EchoStatisticsNode : public MeasureCallbackNode {
const Array<MeasureCandidate>& measure_candidates,
const Array<BuilderResult>& builder_results,
const Array<RunnerResult>& runner_results) final {
auto _ = Profiler::TimedScope("MeasureCallback/EchoStatistics");
if (this->task_info.empty()) {
SetupTaskInfo(task_scheduler->tasks);
}
auto _ = Profiler::TimedScope("EchoStatistics");
ICHECK_EQ(measure_candidates.size(), builder_results.size());
ICHECK_EQ(measure_candidates.size(), runner_results.size());
int n = measure_candidates.size();
Expand Down
2 changes: 1 addition & 1 deletion src/meta_schedule/measure_callback/measure_callback.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void PyMeasureCallbackNode::Apply(const TaskScheduler& task_scheduler,
const Array<BuilderResult>& builds, //
const Array<RunnerResult>& results) {
ICHECK(f_apply != nullptr) << "PyMeasureCallback's Apply method not implemented!";
auto _ = Profiler::TimedScope(this->f_as_string());
auto _ = Profiler::TimedScope("MeasureCallback/" + this->f_as_string());
return f_apply(task_scheduler, task_id, measure_candidates, builds, results);
}

Expand Down
3 changes: 2 additions & 1 deletion src/meta_schedule/measure_callback/remove_build_artifact.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ class RemoveBuildArtifactNode : public MeasureCallbackNode {
const Array<BuilderResult>& builder_results,
const Array<RunnerResult>& runner_results) final {
static const PackedFunc* f_rm = runtime::Registry::Get("meta_schedule.remove_build_dir");
auto _ = Profiler::TimedScope("RemoveBuildArtifact");
ICHECK(*f_rm != nullptr) << "The `remove_build_dir` func is not in tvm registry.";
auto _ = Profiler::TimedScope("MeasureCallback/RemoveBuildArtifact");
for (const BuilderResult& build_result : builder_results) {
if (Optional<String> path = build_result->artifact_path) {
(*f_rm)(path.value());
Expand Down
2 changes: 1 addition & 1 deletion src/meta_schedule/measure_callback/update_cost_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class UpdateCostModelNode : public MeasureCallbackNode {
const Array<MeasureCandidate>& measure_candidates,
const Array<BuilderResult>& builder_results,
const Array<RunnerResult>& runner_results) final {
auto _ = Profiler::TimedScope("UpdateCostModel");
auto _ = Profiler::TimedScope("MeasureCallback/UpdateCostModel");
TuneContext task = task_scheduler->tasks[task_id];
ICHECK(task_scheduler->cost_model.defined())
<< "Cost model must be defined for the task scheduler!";
Expand Down
4 changes: 3 additions & 1 deletion src/meta_schedule/profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ String ProfilerNode::Table() const {
p.Row() << i << table_entry[i].name << table_entry[i].minutes << table_entry[i].percentage;
}
}
p.Separator();
return p.AsStr();
}

Expand All @@ -79,7 +80,8 @@ PackedFunc ProfilerTimedScope(String name) {
tik = std::chrono::high_resolution_clock::now(), //
name = std::move(name)]() {
auto tok = std::chrono::high_resolution_clock::now();
double duration = std::chrono::duration_cast<std::chrono::seconds>(tok - tik).count();
double duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(tok - tik).count() / 1e9;
profiler->stats_sec[name] += duration;
});
}
Expand Down
108 changes: 61 additions & 47 deletions src/meta_schedule/search_strategy/evolutionary_search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -499,41 +499,49 @@ std::vector<Schedule> EvolutionarySearchNode::State::SampleInitPopulation(int nu

std::vector<Schedule> EvolutionarySearchNode::State::EvolveWithCostModel(
std::vector<Schedule> population, int num) {
ICHECK_GT(num, 0);
// The heap to record best schedule, we do not consider schedules that are already measured
IRModuleSet exists = this->measured_workloads_;
IRModuleSet exists;
{
auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc/CopyMeasuredWorkloads");
ICHECK_GT(num, 0);
// The heap to record best schedule, we do not consider schedules that are already measured
exists = this->measured_workloads_;
}
SizedHeap heap(num);
for (int iter = 0;; ++iter) {
// Predict normalized score with the cost model,
std::vector<double> scores = PredictNormalizedScore(population, //
GetRef<TuneContext>(self->context_), //
this->cost_model_, //
this->args_info_);
ICHECK_EQ(scores.size(), population.size());
for (int i = 0, n = population.size(); i < n; ++i) {
Schedule sch = population.at(i);
IRModule mod = sch->mod();
size_t shash = StructuralHash()(mod);
double score = scores.at(i);
if (!exists.Has(mod, shash)) {
exists.Add(mod, shash);
heap.Push(sch, score);

{
auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc");
ICHECK_EQ(scores.size(), population.size());
for (int i = 0, n = population.size(); i < n; ++i) {
Schedule sch = population.at(i);
IRModule mod = sch->mod();
size_t shash = StructuralHash()(mod);
double score = scores.at(i);
if (!exists.Has(mod, shash)) {
exists.Add(mod, shash);
heap.Push(sch, score);
}
}
// Discontinue once it reaches end of search
if (iter == self->genetic_num_iters) {
break;
}
// Set threaded samplers, with probability from predicated normalized throughput
for (PerThreadData& data : this->per_thread_data_) {
data.Set(scores, self->genetic_mutate_prob, self->context_->mutator_probs);
}
}
// Discontinue once it reaches end of search
if (iter == self->genetic_num_iters) {
break;
}
// Set threaded samplers, with probability from predicated normalized throughput
for (PerThreadData& data : this->per_thread_data_) {
data.Set(scores, self->genetic_mutate_prob, self->context_->mutator_probs);
}
ThreadedTraceApply pp(self->context_->postprocs);
ConcurrentBitmask cbmask(self->population_size);
std::vector<Schedule> next_population(self->population_size, Schedule{nullptr});
// The worker function
{
auto _ = Profiler::TimedScope("EvoSearch/Evolve/Mutation");
ThreadedTraceApply pp(self->context_->postprocs);
ConcurrentBitmask cbmask(self->population_size);
std::vector<Schedule> next_population(self->population_size, Schedule{nullptr});
// The worker function
auto f_find_candidate = [&cbmask, &population, &next_population, &pp, this](int thread_id,
int trace_id) {
// Prepare samplers
Expand Down Expand Up @@ -571,40 +579,46 @@ std::vector<Schedule> EvolutionarySearchNode::State::EvolveWithCostModel(
};
support::parallel_for_dynamic(0, self->population_size, self->context_->num_threads,
f_find_candidate);

population.swap(next_population);
TVM_PY_LOG(INFO, self->context_->logging_func)
<< "Evolve iter #" << iter << " done. Summary:\n"
<< pp.SummarizeFailures();
}
population.swap(next_population);
TVM_PY_LOG(INFO, self->context_->logging_func) << "Evolve iter #" << iter << " done. Summary:\n"
<< pp.SummarizeFailures();
}
// Return the best states from the heap, sorting from higher score to lower ones
std::sort(heap.heap.begin(), heap.heap.end());
std::vector<Schedule> results;
results.reserve(num);
for (const SizedHeap::Item& item : heap.heap) {
results.push_back(item.sch);
}
{
auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc");
std::sort(heap.heap.begin(), heap.heap.end());
std::vector<Schedule> results;
results.reserve(num);
for (const SizedHeap::Item& item : heap.heap) {
results.push_back(item.sch);
}

constexpr int kNumScoresPerLine = 16;
std::ostringstream os;
int n = heap.heap.size();
for (int st = 0; st < n; st += kNumScoresPerLine) {
os << std::endl;
int ed = std::min(st + kNumScoresPerLine, n);
os << "[" << (st + 1) << " : " << ed << "]:\t";
for (int i = st; i < ed; ++i) {
if (i != st) {
os << " ";
constexpr int kNumScoresPerLine = 16;
std::ostringstream os;
int n = heap.heap.size();
for (int st = 0; st < n; st += kNumScoresPerLine) {
os << std::endl;
int ed = std::min(st + kNumScoresPerLine, n);
os << "[" << (st + 1) << " : " << ed << "]:\t";
for (int i = st; i < ed; ++i) {
if (i != st) {
os << " ";
}
os << std::fixed << std::setprecision(4) << heap.heap.at(i).score;
}
os << std::fixed << std::setprecision(4) << heap.heap.at(i).score;
}
TVM_PY_LOG(INFO, self->context_->logging_func)
<< "Scores of the best " << n << " candidates:" << os.str();
return results;
}
TVM_PY_LOG(INFO, self->context_->logging_func)
<< "Scores of the best " << n << " candidates:" << os.str();
return results;
}

std::vector<Schedule> EvolutionarySearchNode::State::PickWithEpsGreedy(
const std::vector<Schedule>& unmeasured, const std::vector<Schedule>& bests, int num) {
auto _ = Profiler::TimedScope("EvoSearch/PickWithEpsGreedy");
int num_rands = num * self->eps_greedy;
int num_bests = num - num_rands;
std::vector<int> rands =
Expand Down
1 change: 1 addition & 0 deletions src/meta_schedule/task_scheduler/task_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ namespace tvm {
namespace meta_schedule {

void TaskSchedulerNode::InitializeTask(int task_id) {
auto _ = Profiler::TimedScope("InitializeTask");
TuneContext task = this->tasks[task_id];
TVM_PY_LOG(INFO, this->logging_func)
<< "Initializing Task #" << task_id << ": " << task->task_name;
Expand Down
33 changes: 14 additions & 19 deletions tests/python/unittest/test_meta_schedule_tune_relay.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,25 +138,20 @@ def test_meta_schedule_tune_relay(
mod, params, (input_name, _, _) = get_network(name=model_name, input_shape=input_shape)
target = Target(target)
with tempfile.TemporaryDirectory() as work_dir:
rt_mod1: tvm.runtime.Module = ms.tune_relay(
mod=mod,
params=params,
target=target,
config=ms.TuneConfig(
strategy="evolutionary",
num_trials_per_iter=32,
max_trials_per_task=20000,
max_trials_global=20000,
search_strategy_config={
"genetic_num_iters": 10,
},
),
work_dir=work_dir,
database=ms.database.JSONDatabase(
osp.join(work_dir, "workload.json"),
osp.join(work_dir, "records.json"),
),
)
with ms.Profiler() as profiler:
rt_mod1: tvm.runtime.Module = ms.tune_relay(
mod=mod,
params=params,
target=target,
config=ms.TuneConfig(
strategy="evolutionary",
num_trials_per_iter=32,
max_trials_per_task=20000,
max_trials_global=20000,
),
work_dir=work_dir,
)
print(profiler.table())
# Compile without meta-scheduler for correctness check
with tvm.transform.PassContext(opt_level=0):
rt_mod2 = relay.build(mod, target=target, params=params)
Expand Down

0 comments on commit 6ce41be

Please sign in to comment.