Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MetaSchedule] Modify Profiler Timers #11735

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions python/tvm/meta_schedule/tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from .measure_callback import MeasureCallback
from .mutator import Mutator
from .postproc import Postproc
from .profiler import Profiler
from .runner import Runner
from .schedule_rule import ScheduleRule
from .search_strategy import EvolutionarySearch, ReplayFunc, ReplayTrace
Expand Down Expand Up @@ -564,7 +565,8 @@ def tune_relay(
target = default_config.target(target)
# pylint: enable=protected-access,
# parse the tuning contexts
extracted_tasks = extract_task_from_relay(mod, target, params)
with Profiler.timeit("TaskExtraction"):
extracted_tasks = extract_task_from_relay(mod, target, params)
database = tune_extracted_tasks(
extracted_tasks,
config,
Expand All @@ -580,9 +582,10 @@ def tune_relay(
mutator_probs=mutator_probs,
num_threads=num_threads,
)
with target, autotvm_silencer(), ApplyHistoryBest(database):
with PassContext(
opt_level=3,
config={"relay.backend.use_meta_schedule": True},
):
return relay_build(mod, target=target, params=params)
with Profiler.timeit("ApplyHistoryBest"):
with target, autotvm_silencer(), ApplyHistoryBest(database):
with PassContext(
opt_level=3,
config={"relay.backend.use_meta_schedule": True},
):
return relay_build(mod, target=target, params=params)
2 changes: 1 addition & 1 deletion src/meta_schedule/measure_callback/add_to_database.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class AddToDatabaseNode : public MeasureCallbackNode {
if (!task_scheduler->database.defined()) {
return;
}
auto _ = Profiler::TimedScope("AddToDatabase");
auto _ = Profiler::TimedScope("MeasureCallback/AddToDatabase");
TuneContext task = task_scheduler->tasks[task_id];
Database database = task_scheduler->database.value();
Workload workload = database->CommitWorkload(task->mod.value());
Expand Down
2 changes: 1 addition & 1 deletion src/meta_schedule/measure_callback/echo_statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ class EchoStatisticsNode : public MeasureCallbackNode {
const Array<MeasureCandidate>& measure_candidates,
const Array<BuilderResult>& builder_results,
const Array<RunnerResult>& runner_results) final {
auto _ = Profiler::TimedScope("MeasureCallback/EchoStatistics");
if (this->task_info.empty()) {
SetupTaskInfo(task_scheduler->tasks);
}
auto _ = Profiler::TimedScope("EchoStatistics");
ICHECK_EQ(measure_candidates.size(), builder_results.size());
ICHECK_EQ(measure_candidates.size(), runner_results.size());
int n = measure_candidates.size();
Expand Down
2 changes: 1 addition & 1 deletion src/meta_schedule/measure_callback/measure_callback.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void PyMeasureCallbackNode::Apply(const TaskScheduler& task_scheduler,
const Array<BuilderResult>& builds, //
const Array<RunnerResult>& results) {
ICHECK(f_apply != nullptr) << "PyMeasureCallback's Apply method not implemented!";
auto _ = Profiler::TimedScope(this->f_as_string());
auto _ = Profiler::TimedScope("MeasureCallback/" + this->f_as_string());
return f_apply(task_scheduler, task_id, measure_candidates, builds, results);
}

Expand Down
3 changes: 2 additions & 1 deletion src/meta_schedule/measure_callback/remove_build_artifact.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ class RemoveBuildArtifactNode : public MeasureCallbackNode {
const Array<BuilderResult>& builder_results,
const Array<RunnerResult>& runner_results) final {
static const PackedFunc* f_rm = runtime::Registry::Get("meta_schedule.remove_build_dir");
auto _ = Profiler::TimedScope("RemoveBuildArtifact");
ICHECK(*f_rm != nullptr) << "The `remove_build_dir` func is not in tvm registry.";
auto _ = Profiler::TimedScope("MeasureCallback/RemoveBuildArtifact");
for (const BuilderResult& build_result : builder_results) {
if (Optional<String> path = build_result->artifact_path) {
(*f_rm)(path.value());
Expand Down
2 changes: 1 addition & 1 deletion src/meta_schedule/measure_callback/update_cost_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class UpdateCostModelNode : public MeasureCallbackNode {
const Array<MeasureCandidate>& measure_candidates,
const Array<BuilderResult>& builder_results,
const Array<RunnerResult>& runner_results) final {
auto _ = Profiler::TimedScope("UpdateCostModel");
auto _ = Profiler::TimedScope("MeasureCallback/UpdateCostModel");
TuneContext task = task_scheduler->tasks[task_id];
ICHECK(task_scheduler->cost_model.defined())
<< "Cost model must be defined for the task scheduler!";
Expand Down
4 changes: 3 additions & 1 deletion src/meta_schedule/profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ String ProfilerNode::Table() const {
p.Row() << i << table_entry[i].name << table_entry[i].minutes << table_entry[i].percentage;
}
}
p.Separator();
return p.AsStr();
}

Expand All @@ -79,7 +80,8 @@ PackedFunc ProfilerTimedScope(String name) {
tik = std::chrono::high_resolution_clock::now(), //
name = std::move(name)]() {
auto tok = std::chrono::high_resolution_clock::now();
double duration = std::chrono::duration_cast<std::chrono::seconds>(tok - tik).count();
double duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(tok - tik).count() / 1e9;
profiler->stats_sec[name] += duration;
});
}
Expand Down
108 changes: 61 additions & 47 deletions src/meta_schedule/search_strategy/evolutionary_search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -499,41 +499,49 @@ std::vector<Schedule> EvolutionarySearchNode::State::SampleInitPopulation(int nu

std::vector<Schedule> EvolutionarySearchNode::State::EvolveWithCostModel(
std::vector<Schedule> population, int num) {
ICHECK_GT(num, 0);
// The heap to record best schedule, we do not consider schedules that are already measured
IRModuleSet exists = this->measured_workloads_;
IRModuleSet exists;
{
auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc/CopyMeasuredWorkloads");
ICHECK_GT(num, 0);
// The heap to record best schedule, we do not consider schedules that are already measured
exists = this->measured_workloads_;
}
SizedHeap heap(num);
for (int iter = 0;; ++iter) {
// Predict normalized score with the cost model,
std::vector<double> scores = PredictNormalizedScore(population, //
GetRef<TuneContext>(self->context_), //
this->cost_model_, //
this->args_info_);
ICHECK_EQ(scores.size(), population.size());
for (int i = 0, n = population.size(); i < n; ++i) {
Schedule sch = population.at(i);
IRModule mod = sch->mod();
size_t shash = StructuralHash()(mod);
double score = scores.at(i);
if (!exists.Has(mod, shash)) {
exists.Add(mod, shash);
heap.Push(sch, score);

{
auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc");
ICHECK_EQ(scores.size(), population.size());
for (int i = 0, n = population.size(); i < n; ++i) {
Schedule sch = population.at(i);
IRModule mod = sch->mod();
size_t shash = StructuralHash()(mod);
double score = scores.at(i);
if (!exists.Has(mod, shash)) {
exists.Add(mod, shash);
heap.Push(sch, score);
}
}
// Discontinue once it reaches end of search
if (iter == self->genetic_num_iters) {
break;
}
// Set threaded samplers, with probability from predicated normalized throughput
for (PerThreadData& data : this->per_thread_data_) {
data.Set(scores, self->genetic_mutate_prob, self->context_->mutator_probs);
}
}
// Discontinue once it reaches end of search
if (iter == self->genetic_num_iters) {
break;
}
// Set threaded samplers, with probability from predicated normalized throughput
for (PerThreadData& data : this->per_thread_data_) {
data.Set(scores, self->genetic_mutate_prob, self->context_->mutator_probs);
}
ThreadedTraceApply pp(self->context_->postprocs);
ConcurrentBitmask cbmask(self->population_size);
std::vector<Schedule> next_population(self->population_size, Schedule{nullptr});
// The worker function
{
auto _ = Profiler::TimedScope("EvoSearch/Evolve/Mutation");
ThreadedTraceApply pp(self->context_->postprocs);
ConcurrentBitmask cbmask(self->population_size);
std::vector<Schedule> next_population(self->population_size, Schedule{nullptr});
// The worker function
auto f_find_candidate = [&cbmask, &population, &next_population, &pp, this](int thread_id,
int trace_id) {
// Prepare samplers
Expand Down Expand Up @@ -571,40 +579,46 @@ std::vector<Schedule> EvolutionarySearchNode::State::EvolveWithCostModel(
};
support::parallel_for_dynamic(0, self->population_size, self->context_->num_threads,
f_find_candidate);

population.swap(next_population);
TVM_PY_LOG(INFO, self->context_->logging_func)
<< "Evolve iter #" << iter << " done. Summary:\n"
<< pp.SummarizeFailures();
}
population.swap(next_population);
TVM_PY_LOG(INFO, self->context_->logging_func) << "Evolve iter #" << iter << " done. Summary:\n"
<< pp.SummarizeFailures();
}
// Return the best states from the heap, sorting from higher score to lower ones
std::sort(heap.heap.begin(), heap.heap.end());
std::vector<Schedule> results;
results.reserve(num);
for (const SizedHeap::Item& item : heap.heap) {
results.push_back(item.sch);
}
{
auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc");
std::sort(heap.heap.begin(), heap.heap.end());
std::vector<Schedule> results;
results.reserve(num);
for (const SizedHeap::Item& item : heap.heap) {
results.push_back(item.sch);
}

constexpr int kNumScoresPerLine = 16;
std::ostringstream os;
int n = heap.heap.size();
for (int st = 0; st < n; st += kNumScoresPerLine) {
os << std::endl;
int ed = std::min(st + kNumScoresPerLine, n);
os << "[" << (st + 1) << " : " << ed << "]:\t";
for (int i = st; i < ed; ++i) {
if (i != st) {
os << " ";
constexpr int kNumScoresPerLine = 16;
std::ostringstream os;
int n = heap.heap.size();
for (int st = 0; st < n; st += kNumScoresPerLine) {
os << std::endl;
int ed = std::min(st + kNumScoresPerLine, n);
os << "[" << (st + 1) << " : " << ed << "]:\t";
for (int i = st; i < ed; ++i) {
if (i != st) {
os << " ";
}
os << std::fixed << std::setprecision(4) << heap.heap.at(i).score;
}
os << std::fixed << std::setprecision(4) << heap.heap.at(i).score;
}
TVM_PY_LOG(INFO, self->context_->logging_func)
<< "Scores of the best " << n << " candidates:" << os.str();
return results;
}
TVM_PY_LOG(INFO, self->context_->logging_func)
<< "Scores of the best " << n << " candidates:" << os.str();
return results;
}

std::vector<Schedule> EvolutionarySearchNode::State::PickWithEpsGreedy(
const std::vector<Schedule>& unmeasured, const std::vector<Schedule>& bests, int num) {
auto _ = Profiler::TimedScope("EvoSearch/PickWithEpsGreedy");
int num_rands = num * self->eps_greedy;
int num_bests = num - num_rands;
std::vector<int> rands =
Expand Down
1 change: 1 addition & 0 deletions src/meta_schedule/task_scheduler/task_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ namespace tvm {
namespace meta_schedule {

void TaskSchedulerNode::InitializeTask(int task_id) {
auto _ = Profiler::TimedScope("InitializeTask");
TuneContext task = this->tasks[task_id];
TVM_PY_LOG(INFO, this->logging_func)
<< "Initializing Task #" << task_id << ": " << task->task_name;
Expand Down
33 changes: 14 additions & 19 deletions tests/python/unittest/test_meta_schedule_tune_relay.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,25 +138,20 @@ def test_meta_schedule_tune_relay(
mod, params, (input_name, _, _) = get_network(name=model_name, input_shape=input_shape)
target = Target(target)
with tempfile.TemporaryDirectory() as work_dir:
rt_mod1: tvm.runtime.Module = ms.tune_relay(
mod=mod,
params=params,
target=target,
config=ms.TuneConfig(
strategy="evolutionary",
num_trials_per_iter=32,
max_trials_per_task=20000,
max_trials_global=20000,
search_strategy_config={
"genetic_num_iters": 10,
},
),
work_dir=work_dir,
database=ms.database.JSONDatabase(
osp.join(work_dir, "workload.json"),
osp.join(work_dir, "records.json"),
),
)
with ms.Profiler() as profiler:
rt_mod1: tvm.runtime.Module = ms.tune_relay(
mod=mod,
params=params,
target=target,
config=ms.TuneConfig(
strategy="evolutionary",
num_trials_per_iter=32,
max_trials_per_task=20000,
max_trials_global=20000,
),
work_dir=work_dir,
)
print(profiler.table())
# Compile without meta-scheduler for correctness check
with tvm.transform.PassContext(opt_level=0):
rt_mod2 = relay.build(mod, target=target, params=params)
Expand Down