diff --git a/python/tvm/meta_schedule/tune.py b/python/tvm/meta_schedule/tune.py index 9ee02aa2bbc6..729369fa2354 100644 --- a/python/tvm/meta_schedule/tune.py +++ b/python/tvm/meta_schedule/tune.py @@ -38,6 +38,7 @@ from .measure_callback import MeasureCallback from .mutator import Mutator from .postproc import Postproc +from .profiler import Profiler from .runner import Runner from .schedule_rule import ScheduleRule from .search_strategy import EvolutionarySearch, ReplayFunc, ReplayTrace @@ -564,7 +565,8 @@ def tune_relay( target = default_config.target(target) # pylint: enable=protected-access, # parse the tuning contexts - extracted_tasks = extract_task_from_relay(mod, target, params) + with Profiler.timeit("TaskExtraction"): + extracted_tasks = extract_task_from_relay(mod, target, params) database = tune_extracted_tasks( extracted_tasks, config, @@ -580,9 +582,10 @@ def tune_relay( mutator_probs=mutator_probs, num_threads=num_threads, ) - with target, autotvm_silencer(), ApplyHistoryBest(database): - with PassContext( - opt_level=3, - config={"relay.backend.use_meta_schedule": True}, - ): - return relay_build(mod, target=target, params=params) + with Profiler.timeit("ApplyHistoryBest"): + with target, autotvm_silencer(), ApplyHistoryBest(database): + with PassContext( + opt_level=3, + config={"relay.backend.use_meta_schedule": True}, + ): + return relay_build(mod, target=target, params=params) diff --git a/src/meta_schedule/measure_callback/add_to_database.cc b/src/meta_schedule/measure_callback/add_to_database.cc index e86da3720f35..26399276c933 100644 --- a/src/meta_schedule/measure_callback/add_to_database.cc +++ b/src/meta_schedule/measure_callback/add_to_database.cc @@ -30,7 +30,7 @@ class AddToDatabaseNode : public MeasureCallbackNode { if (!task_scheduler->database.defined()) { return; } - auto _ = Profiler::TimedScope("AddToDatabase"); + auto _ = Profiler::TimedScope("MeasureCallback/AddToDatabase"); TuneContext task = task_scheduler->tasks[task_id]; Database database = task_scheduler->database.value(); Workload workload = database->CommitWorkload(task->mod.value()); diff --git a/src/meta_schedule/measure_callback/echo_statistics.cc b/src/meta_schedule/measure_callback/echo_statistics.cc index 5f3dce06f09c..fb1064266566 100644 --- a/src/meta_schedule/measure_callback/echo_statistics.cc +++ b/src/meta_schedule/measure_callback/echo_statistics.cc @@ -79,10 +79,10 @@ class EchoStatisticsNode : public MeasureCallbackNode { const Array& measure_candidates, const Array& builder_results, const Array& runner_results) final { + auto _ = Profiler::TimedScope("MeasureCallback/EchoStatistics"); if (this->task_info.empty()) { SetupTaskInfo(task_scheduler->tasks); } - auto _ = Profiler::TimedScope("EchoStatistics"); ICHECK_EQ(measure_candidates.size(), builder_results.size()); ICHECK_EQ(measure_candidates.size(), runner_results.size()); int n = measure_candidates.size(); diff --git a/src/meta_schedule/measure_callback/measure_callback.cc b/src/meta_schedule/measure_callback/measure_callback.cc index e49f5216ec57..ebe63e7b76f1 100644 --- a/src/meta_schedule/measure_callback/measure_callback.cc +++ b/src/meta_schedule/measure_callback/measure_callback.cc @@ -27,7 +27,7 @@ void PyMeasureCallbackNode::Apply(const TaskScheduler& task_scheduler, const Array& builds, // const Array& results) { ICHECK(f_apply != nullptr) << "PyMeasureCallback's Apply method not implemented!"; - auto _ = Profiler::TimedScope(this->f_as_string()); + auto _ = Profiler::TimedScope("MeasureCallback/" + this->f_as_string()); return f_apply(task_scheduler, task_id, measure_candidates, builds, results); } diff --git a/src/meta_schedule/measure_callback/remove_build_artifact.cc b/src/meta_schedule/measure_callback/remove_build_artifact.cc index 67267dff91c8..0abbebf3b484 100644 --- a/src/meta_schedule/measure_callback/remove_build_artifact.cc +++ b/src/meta_schedule/measure_callback/remove_build_artifact.cc @@ -28,7 +28,8 @@ class RemoveBuildArtifactNode : public MeasureCallbackNode { const Array& builder_results, const Array& runner_results) final { static const PackedFunc* f_rm = runtime::Registry::Get("meta_schedule.remove_build_dir"); - auto _ = Profiler::TimedScope("RemoveBuildArtifact"); + ICHECK(*f_rm != nullptr) << "The `remove_build_dir` func is not in tvm registry."; + auto _ = Profiler::TimedScope("MeasureCallback/RemoveBuildArtifact"); for (const BuilderResult& build_result : builder_results) { if (Optional path = build_result->artifact_path) { (*f_rm)(path.value()); diff --git a/src/meta_schedule/measure_callback/update_cost_model.cc b/src/meta_schedule/measure_callback/update_cost_model.cc index 5b6208581cc7..8851345c43b0 100644 --- a/src/meta_schedule/measure_callback/update_cost_model.cc +++ b/src/meta_schedule/measure_callback/update_cost_model.cc @@ -27,7 +27,7 @@ class UpdateCostModelNode : public MeasureCallbackNode { const Array& measure_candidates, const Array& builder_results, const Array& runner_results) final { - auto _ = Profiler::TimedScope("UpdateCostModel"); + auto _ = Profiler::TimedScope("MeasureCallback/UpdateCostModel"); TuneContext task = task_scheduler->tasks[task_id]; ICHECK(task_scheduler->cost_model.defined()) << "Cost model must be defined for the task scheduler!"; diff --git a/src/meta_schedule/profiler.cc b/src/meta_schedule/profiler.cc index d3f72bb70577..2f955ebf09c2 100644 --- a/src/meta_schedule/profiler.cc +++ b/src/meta_schedule/profiler.cc @@ -63,6 +63,7 @@ String ProfilerNode::Table() const { p.Row() << i << table_entry[i].name << table_entry[i].minutes << table_entry[i].percentage; } } + p.Separator(); return p.AsStr(); } @@ -79,7 +80,8 @@ PackedFunc ProfilerTimedScope(String name) { tik = std::chrono::high_resolution_clock::now(), // name = std::move(name)]() { auto tok = std::chrono::high_resolution_clock::now(); - double duration = std::chrono::duration_cast(tok - tik).count(); + double duration = + std::chrono::duration_cast(tok - tik).count() / 1e9; profiler->stats_sec[name] += duration; }); } diff --git a/src/meta_schedule/search_strategy/evolutionary_search.cc b/src/meta_schedule/search_strategy/evolutionary_search.cc index acde7f65a86c..3b672639aaa0 100644 --- a/src/meta_schedule/search_strategy/evolutionary_search.cc +++ b/src/meta_schedule/search_strategy/evolutionary_search.cc @@ -499,9 +499,13 @@ std::vector EvolutionarySearchNode::State::SampleInitPopulation(int nu std::vector EvolutionarySearchNode::State::EvolveWithCostModel( std::vector population, int num) { - ICHECK_GT(num, 0); - // The heap to record best schedule, we do not consider schedules that are already measured - IRModuleSet exists = this->measured_workloads_; + IRModuleSet exists; + { + auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc/CopyMeasuredWorkloads"); + ICHECK_GT(num, 0); + // The heap to record best schedule, we do not consider schedules that are already measured + exists = this->measured_workloads_; + } SizedHeap heap(num); for (int iter = 0;; ++iter) { // Predict normalized score with the cost model, @@ -509,31 +513,35 @@ std::vector EvolutionarySearchNode::State::EvolveWithCostModel( GetRef(self->context_), // this->cost_model_, // this->args_info_); - ICHECK_EQ(scores.size(), population.size()); - for (int i = 0, n = population.size(); i < n; ++i) { - Schedule sch = population.at(i); - IRModule mod = sch->mod(); - size_t shash = StructuralHash()(mod); - double score = scores.at(i); - if (!exists.Has(mod, shash)) { - exists.Add(mod, shash); - heap.Push(sch, score); + + { + auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc"); + ICHECK_EQ(scores.size(), population.size()); + for (int i = 0, n = population.size(); i < n; ++i) { + Schedule sch = population.at(i); + IRModule mod = sch->mod(); + size_t shash = StructuralHash()(mod); + double score = scores.at(i); + if (!exists.Has(mod, shash)) { + exists.Add(mod, shash); + heap.Push(sch, score); + } + } + // Discontinue once it reaches end of search + if (iter == self->genetic_num_iters) { + break; + } + // Set threaded samplers, with probability from predicated normalized throughput + for (PerThreadData& data : this->per_thread_data_) { + data.Set(scores, self->genetic_mutate_prob, self->context_->mutator_probs); } } - // Discontinue once it reaches end of search - if (iter == self->genetic_num_iters) { - break; - } - // Set threaded samplers, with probability from predicated normalized throughput - for (PerThreadData& data : this->per_thread_data_) { - data.Set(scores, self->genetic_mutate_prob, self->context_->mutator_probs); - } - ThreadedTraceApply pp(self->context_->postprocs); - ConcurrentBitmask cbmask(self->population_size); - std::vector next_population(self->population_size, Schedule{nullptr}); - // The worker function { auto _ = Profiler::TimedScope("EvoSearch/Evolve/Mutation"); + ThreadedTraceApply pp(self->context_->postprocs); + ConcurrentBitmask cbmask(self->population_size); + std::vector next_population(self->population_size, Schedule{nullptr}); + // The worker function auto f_find_candidate = [&cbmask, &population, &next_population, &pp, this](int thread_id, int trace_id) { // Prepare samplers @@ -571,40 +579,46 @@ std::vector EvolutionarySearchNode::State::EvolveWithCostModel( }; support::parallel_for_dynamic(0, self->population_size, self->context_->num_threads, f_find_candidate); + + population.swap(next_population); + TVM_PY_LOG(INFO, self->context_->logging_func) + << "Evolve iter #" << iter << " done. Summary:\n" + << pp.SummarizeFailures(); } - population.swap(next_population); - TVM_PY_LOG(INFO, self->context_->logging_func) << "Evolve iter #" << iter << " done. Summary:\n" - << pp.SummarizeFailures(); } // Return the best states from the heap, sorting from higher score to lower ones - std::sort(heap.heap.begin(), heap.heap.end()); - std::vector results; - results.reserve(num); - for (const SizedHeap::Item& item : heap.heap) { - results.push_back(item.sch); - } + { + auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc"); + std::sort(heap.heap.begin(), heap.heap.end()); + std::vector results; + results.reserve(num); + for (const SizedHeap::Item& item : heap.heap) { + results.push_back(item.sch); + } - constexpr int kNumScoresPerLine = 16; - std::ostringstream os; - int n = heap.heap.size(); - for (int st = 0; st < n; st += kNumScoresPerLine) { - os << std::endl; - int ed = std::min(st + kNumScoresPerLine, n); - os << "[" << (st + 1) << " : " << ed << "]:\t"; - for (int i = st; i < ed; ++i) { - if (i != st) { - os << " "; + constexpr int kNumScoresPerLine = 16; + std::ostringstream os; + int n = heap.heap.size(); + for (int st = 0; st < n; st += kNumScoresPerLine) { + os << std::endl; + int ed = std::min(st + kNumScoresPerLine, n); + os << "[" << (st + 1) << " : " << ed << "]:\t"; + for (int i = st; i < ed; ++i) { + if (i != st) { + os << " "; + } + os << std::fixed << std::setprecision(4) << heap.heap.at(i).score; } - os << std::fixed << std::setprecision(4) << heap.heap.at(i).score; } + TVM_PY_LOG(INFO, self->context_->logging_func) + << "Scores of the best " << n << " candidates:" << os.str(); + return results; } - TVM_PY_LOG(INFO, self->context_->logging_func) - << "Scores of the best " << n << " candidates:" << os.str(); - return results; } std::vector EvolutionarySearchNode::State::PickWithEpsGreedy( const std::vector& unmeasured, const std::vector& bests, int num) { + auto _ = Profiler::TimedScope("EvoSearch/PickWithEpsGreedy"); int num_rands = num * self->eps_greedy; int num_bests = num - num_rands; std::vector rands = diff --git a/src/meta_schedule/task_scheduler/task_scheduler.cc b/src/meta_schedule/task_scheduler/task_scheduler.cc index 9c1f451414e3..ea233648f4f5 100644 --- a/src/meta_schedule/task_scheduler/task_scheduler.cc +++ b/src/meta_schedule/task_scheduler/task_scheduler.cc @@ -22,6 +22,7 @@ namespace tvm { namespace meta_schedule { void TaskSchedulerNode::InitializeTask(int task_id) { + auto _ = Profiler::TimedScope("InitializeTask"); TuneContext task = this->tasks[task_id]; TVM_PY_LOG(INFO, this->logging_func) << "Initializing Task #" << task_id << ": " << task->task_name; diff --git a/tests/python/unittest/test_meta_schedule_tune_relay.py b/tests/python/unittest/test_meta_schedule_tune_relay.py index c2baf8d2b921..d86b6fe48b8f 100644 --- a/tests/python/unittest/test_meta_schedule_tune_relay.py +++ b/tests/python/unittest/test_meta_schedule_tune_relay.py @@ -138,25 +138,20 @@ def test_meta_schedule_tune_relay( mod, params, (input_name, _, _) = get_network(name=model_name, input_shape=input_shape) target = Target(target) with tempfile.TemporaryDirectory() as work_dir: - rt_mod1: tvm.runtime.Module = ms.tune_relay( - mod=mod, - params=params, - target=target, - config=ms.TuneConfig( - strategy="evolutionary", - num_trials_per_iter=32, - max_trials_per_task=20000, - max_trials_global=20000, - search_strategy_config={ - "genetic_num_iters": 10, - }, - ), - work_dir=work_dir, - database=ms.database.JSONDatabase( - osp.join(work_dir, "workload.json"), - osp.join(work_dir, "records.json"), - ), - ) + with ms.Profiler() as profiler: + rt_mod1: tvm.runtime.Module = ms.tune_relay( + mod=mod, + params=params, + target=target, + config=ms.TuneConfig( + strategy="evolutionary", + num_trials_per_iter=32, + max_trials_per_task=20000, + max_trials_global=20000, + ), + work_dir=work_dir, + ) + print(profiler.table()) # Compile without meta-scheduler for correctness check with tvm.transform.PassContext(opt_level=0): rt_mod2 = relay.build(mod, target=target, params=params)