diff --git a/python/tvm/ansor/__init__.py b/python/tvm/ansor/__init__.py index 3e9b76c2f6ad..2d27995e328e 100644 --- a/python/tvm/ansor/__init__.py +++ b/python/tvm/ansor/__init__.py @@ -29,6 +29,7 @@ from .compute_dag import ComputeDAG from .task import SearchTask, MetaTileRewritePolicy, TuneOption from .task import auto_schedule -from .measure import MeasureInput, LocalBuilder, LocalRunner, RPCRunner +from .measure import MeasureInput, LocalBuilder, LocalRunner, RPCRunner, RPCRunnerWarpper from .cost_model import RandomModel +from .cost_model.xgb_model import XGBModel from .serialization import LogToFile, LogReader, best_measure_pair_in_file diff --git a/python/tvm/ansor/cost_model/cost_model.py b/python/tvm/ansor/cost_model/cost_model.py index a0e586d69cec..fd9b67927185 100644 --- a/python/tvm/ansor/cost_model/cost_model.py +++ b/python/tvm/ansor/cost_model/cost_model.py @@ -42,3 +42,32 @@ def random_number(n, return_ptr): return_ptr = ctypes.cast(return_ptr, ctypes.POINTER(ctypes.c_float)) array_wrapper = np.ctypeslib.as_array(return_ptr, shape=(n,)) array_wrapper[:] = np.random.uniform(0, 1, (n,)) + +@tvm._ffi.register_object("ansor.PythonBasedModel") +class PythonBasedModel(CostModel): + def __init__(self): + def update_func(inputs, results): + self.update(inputs, results) + + def predict_func(task, states, return_ptr): + return_ptr = ctypes.cast(return_ptr, ctypes.POINTER(ctypes.c_float)) + array_wrapper = np.ctypeslib.as_array(return_ptr, shape=(len(states),)) + array_wrapper[:] = self.predict(task, states) + + def predict_stage_func(task, states, return_ptr): + ret = self.predict_stages(task, states) + return_ptr = ctypes.cast(return_ptr, ctypes.POINTER(ctypes.c_float)) + array_wrapper = np.ctypeslib.as_array(return_ptr, shape=ret.shape) + array_wrapper[:] = ret + + self.__init_handle_by_constructor__(_ffi_api.PythonBasedModel, update_func, + predict_func, predict_stage_func) + + def update(self, inputs, results): + raise NotImplementedError + + def predict(self, task, states): + raise NotImplementedError + + def predict_stages(self, task, states): + raise NotImplementedError diff --git a/python/tvm/ansor/cost_model/xgb_model.py b/python/tvm/ansor/cost_model/xgb_model.py new file mode 100644 index 000000000000..e61acfbd168f --- /dev/null +++ b/python/tvm/ansor/cost_model/xgb_model.py @@ -0,0 +1,476 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Cost model based on xgboost""" +from typing import List +import multiprocessing +import logging +import time +from collections import defaultdict + +import numpy as np +import xgboost as xgb + +from ...autotvm.tuner.xgboost_cost_model import get_rank, recall_curve, max_curve +from .cost_model import PythonBasedModel +from ..feature import get_per_stmt_features_from_measure_pairs, get_per_stmt_features_from_states +from ..serialization import LogReader + +logger = logging.getLogger('ansor') + +class XGBDMatrixContext: + """Context to hold additional attributes of xgb.DMatrix""" + def __init__(self): + self.context_dict = defaultdict(dict) + + def get(self, key, matrix, default=None): + return self.context_dict[key].get(matrix.handle.value, default) + + def put(self, key, matrix, value): + self.context_dict[key][matrix.handle.value] = value + +dmatrix_context = XGBDMatrixContext() + +class XGBModel(PythonBasedModel): + """Train a XGBoost model to predict the runtime cost of a program. + The cost of a program = the sum of the costs of all stages in this program. + i.e. Cost(p) = cost_s0 + cost_s1 + ... + cost_sn, where cost_si is the cost of Stage i + + The xgboost model makes prediction per stage, then we sum them up. + The final predction made by this class is normalized throughtput (from 0 to 1, larger is better) + + To support this stage decomposition, we have to implement a custom loss function for + XGBoost, which is the `pack_sum` in the code below. + """ + def __init__(self, verbose_eval=25, num_warmup_sample=100, seed=None): + self.xgb_params = { + 'max_depth': 10, + 'gamma': 0.001, + 'min_child_weight': 0, + 'eta': 0.2, + # todo(lmzheng): automatically decrease learning rate when the loss is too large + + 'n_gpus': 0, + 'n_threads': multiprocessing.cpu_count() / 2, + 'silent': 0, + 'seed': seed or 43, + 'disable_default_eval_metric': 1 + } + self.bst = None + self.plan_size = 32 + self.num_warmup_sample = num_warmup_sample + self.verbose_eval = verbose_eval + + super().__init__() + + # measurement input/result pairs + self.inputs = [] + self.results = [] + self.inputs_feature_cache = [] + + def update(self, inputs, results): + if len(inputs) <= 0: + return + + self.inputs.extend(inputs) + self.results.extend(results) + + # extract feature + n_cached = len(self.inputs_feature_cache) + features, normalized_throughputs, task_ids = \ + get_per_stmt_features_from_measure_pairs(self.inputs, self.results, + skip_first_n_feature_extraction=n_cached) + if n_cached > 0: + features = list(features) + features[:n_cached] = self.inputs_feature_cache + features = np.array(features) + self.inputs_feature_cache = features + dtrain = pack_sum_xgbmatrix(features, normalized_throughputs, task_ids, normalized_throughputs) + + # train xgb model + self.bst = xgb.train(self.xgb_params, dtrain, + num_boost_round=10000, + obj=pack_sum_square_error, + callbacks=[custom_callback( + stopping_rounds=50, + metric='tr-p-rmse', + fevals=[ + pack_sum_rmse, pack_sum_average_peak_score(self.plan_size), + ], + evals=[(dtrain, 'tr')], + maximize=False, + verbose_eval=self.verbose_eval)]) + + def predict(self, task, states): + features = get_per_stmt_features_from_states(states, task) + if self.bst is not None and len(self.inputs) > self.num_warmup_sample: + dtest, pack_ids = pack_sum_xgbmatrix_for_prediction(features) + raw_preds = self.bst.predict(dtest) + ret = pack_sum_predict_throughput(raw_preds, pack_ids) + else: + ret = np.random.uniform(0, 1, (len(states),)) + + # Predict 0 for invalid states that failed to be lowered. + for idx, feature in enumerate(features): + if feature.min() == feature.max() == 0: + ret[idx] = float('-inf') + + return ret + + def predict_stages(self, task, states): + # Format: (s0 score, ..., sN score, s0 n_stage, s0 stage 0, ..., s1 n_stage, s1 stage 0,) + + features = get_per_stmt_features_from_states(states, task) + if self.bst is not None and len(self.inputs) > self.num_warmup_sample: + dtest, pack_ids = pack_sum_xgbmatrix_for_prediction(features) + raw_preds = self.bst.predict(dtest) + breakdown = pack_sum_predict_throughput(raw_preds, pack_ids) + stage_scores = [[] for _ in range(len(states))] + for pred, pack_id in zip(raw_preds, pack_ids): + stage_scores[pack_id].append(pred) + for idx, stage_score in enumerate(stage_scores): + breakdown = np.append(breakdown, len(stage_score)) + breakdown = np.concatenate((breakdown, -np.array(stage_score))) + else: + breakdown = np.concatenate( + (np.random.uniform(0, 1, (len(states), )), np.zeros(len(states), ))) + + # Predict 0 for invalid states that failed to be lowered. + for idx, feature in enumerate(features): + if feature.min() == feature.max() == 0: + breakdown[idx] = float('-inf') + + return breakdown + + def load_log_file(self, file_name, n_lines=-1): + inputs, results = LogReader(file_name).read_lines(n_lines) + logger.info("XGBModel: Loaded %s lines of history log from %s", len(inputs), file_name) + self.update(inputs, results) + + def save(self, file_name: str): + self.bst.save_model(file_name) + + def load(self, file_name: str): + if self.bst is None: + self.bst = xgb.Booster(self.xgb_params) + self.bst.load_model(file_name) + self.num_warmup_sample = -1 + + +def pack_sum_xgbmatrix_for_prediction(xs): + x_flatten = [] + pack_ids = [] + + for ct, x in enumerate(xs): + for row in x: + x_flatten.append(row) + pack_ids.append(ct) + + return xgb.DMatrix(x_flatten), pack_ids + + +def pack_sum_xgbmatrix(xs, ys, gids=None, weights=None): + if gids is not None: + # sort by group + indices = gids.argsort() + xs, ys = xs[indices], ys[indices] + group_sizes = np.bincount(gids) + if weights is not None: + weights = weights[indices] + else: + # assume it has only one group + group_sizes = [len(xs)] + + x_flatten = [] + y_flatten = [] + weights_flatten = [] + pack_ids = [] + + if weights is not None: + for ct, (x, y, w) in enumerate(zip(xs, ys, weights)): + for row in x: + x_flatten.append(row) + y_flatten.append(y) + weights_flatten.append(w) + pack_ids.append(ct) + else: + for ct, (x, y) in enumerate(zip(xs, ys)): + for row in x: + x_flatten.append(row) + y_flatten.append(y) + pack_ids.append(ct) + + ret = xgb.DMatrix(x_flatten, y_flatten) + if weights is not None: + ret.set_weight(weights_flatten) + dmatrix_context.put('pack_ids', ret, np.array(pack_ids)) + dmatrix_context.put('group_sizes', ret, group_sizes) + return ret + +LOSS_TYPE = 3 + +# Type 0 +# The model predicts cost. Use square error of throughput as loss +# loss = 1/2 * (1 / sum(x_i) - y) ^ 2 +# +# Type 1 +# The model predicts cost. Use square error of cost as loss +# loss = 1/2 * (sum(x_i) - 1 / y) ^ 2 +# +# Type 2 +# The model predicts throughput. Use square error of throughput as loss. +# loss = 1/2 * (1 / sum(1 / x_i) - y) ^ 2 +# +# Type 3 +# The model predicts throughput. Use square error of throughput as loss. +# But approximate 1 / (1 / a_1 + 1 / a_2 + ... + 1 / a_n) with -(b_1 + b_2 + b_3) +# loss = 1/2 * (-sum(x_i) - y) ^ 2 +# +# Type 4 +# The model predicts throughput. Use square error of throughput as loss. +# But approximate 1 / (1 / a_1 + 1 / a_2 + ... + 1 / a_n) with -(b_1 + b_2 + b_3) +# Also add a sigmoid to force the prediction to be within the range of (0, 1) +# loss = 1/2 * (sigmoid(-sum(x_i)) - y) ^ 2 +# + +def pack_sum_predict_throughput(raw_preds, pack_ids): + if LOSS_TYPE == 0: + sum_pred = np.bincount(pack_ids, weights=raw_preds) + return 1 / sum_pred + elif LOSS_TYPE == 1: + sum_pred = np.bincount(pack_ids, weights=raw_preds) + return 1 / sum_pred + elif LOSS_TYPE == 2: + sum_inverse_preds = np.bincount(pack_ids, weights=1 / raw_preds) + return 1 / sum_inverse_preds + elif LOSS_TYPE == 3: + sum_pred = np.bincount(pack_ids, weights=raw_preds) + return - sum_pred # pylint: disable=invalid-unary-operand-type + elif LOSS_TYPE == 4: + sum_pred = np.bincount(pack_ids, weights=raw_preds) + return 1 / (1 + np.exp(sum_pred)) + else: + raise ValueError("Invalid loss type: " + LOSS_TYPE) + +def pack_sum_square_error(preds, dtrain): + pack_ids = dmatrix_context.get("pack_ids", dtrain) + weight = dtrain.get_weight() + + if LOSS_TYPE == 0: + sum_pred = np.bincount(pack_ids, weights=preds) + x = sum_pred[pack_ids] + y = dtrain.get_label() + gradient = (x * y - 1) / np.power(x, 3) + hessian = (3 - 2 * x * y) / np.power(x, 4) + elif LOSS_TYPE == 1: + sum_pred = np.bincount(pack_ids, weights=preds) + x = sum_pred[pack_ids] + y = dtrain.get_label() + gradient = x - 1 / np.minimum(y, 1e6) + hessian = np.ones_like(gradient) + elif LOSS_TYPE == 2: + sum_inverse_preds = np.bincount(pack_ids, weights=1 / preds)[pack_ids] + y = dtrain.get_label() + gradient = (1 / sum_inverse_preds - y) / (np.power(preds * sum_inverse_preds, 2)) + hessian = (2 * preds * y * np.power(sum_inverse_preds, 2) - 2 * y * sum_inverse_preds - 2 * preds * sum_inverse_preds + 3) / (np.power(preds * sum_inverse_preds, 4)) + elif LOSS_TYPE == 3: + sum_pred = np.bincount(pack_ids, weights=preds) + x = sum_pred[pack_ids] + y = dtrain.get_label() + gradient = x + y + hessian = np.ones_like(gradient) + elif LOSS_TYPE == 4: + sum_pred = np.bincount(pack_ids, weights=preds) + exp_x = np.exp(sum_pred[pack_ids]) + exp_2x = np.power(exp_x, 2) + y = dtrain.get_label() + gradient = exp_x * (exp_x * y + y - 1) / np.power(exp_x + 1, 3) + hessian = exp_x * (-exp_2x * y + 2 * exp_x + y - 1) / np.power(exp_x + 1, 4) + else: + raise ValueError("Invalid loss type: " + LOSS_TYPE) + + if len(weight) == 0: + return gradient, hessian + else: + return gradient * weight, hessian * weight + +def pack_sum_rmse(raw_preds, dtrain): + pack_ids = dmatrix_context.get("pack_ids", dtrain) + preds = pack_sum_predict_throughput(raw_preds, pack_ids)[pack_ids] + return 'p-rmse', np.sqrt(np.mean(np.square((preds - dtrain.get_label())))) + +def pack_sum_average_peak_score(N): + """Evaluate pack sum average peak score for xgb""" + + def feval(preds, labels): + group_sizes = dmatrix_context.get('group_sizes', labels, [len(preds)]) + pack_ids = dmatrix_context.get("pack_ids", labels) + + preds = pack_sum_predict_throughput(preds, pack_ids) + labels = (np.bincount(pack_ids, weights=labels.get_label()) + / np.unique(pack_ids, return_counts=True)[1]) + + scores = [] + offset = 0 + for size in group_sizes: + preds_group = preds[offset:offset + size] + labels_group = labels[offset:offset + size] + offset += size + + trials = np.argsort(preds_group)[::-1][:N] + trial_scores = labels_group[trials] + curve = max_curve(trial_scores) / np.max(labels_group) + scores.append(np.mean(curve)) + return "a-peak@%d" % N, np.mean(scores) + return feval + +def pack_sum_average_recall_score(N): + """evaluate average recall score for xgb""" + + def feval(preds, labels): + group_sizes = dmatrix_context.get('group_sizes', labels, [len(preds)]) + pack_ids = dmatrix_context.get("pack_ids", labels) + + preds = pack_sum_predict_throughput(preds, pack_ids) + labels = (np.bincount(pack_ids, weights=labels.get_label()) + / np.unique(pack_ids, return_counts=True)[1]) + + scores = [] + offset = 0 + for size in group_sizes: + preds_group = preds[offset:offset + size] + labels_group = labels[offset:offset + size] + offset += size + + trials = np.argsort(preds_group)[::-1] + ranks = get_rank(labels_group[trials])[:N] + curve = recall_curve(ranks) + scores.append(np.mean(curve)) + return "a-recall@%d" % N, np.mean(scores) + return feval + + +def custom_callback(stopping_rounds, metric, fevals, evals=(), log_file=None, + maximize=False, verbose_eval=True, skip_every=2): + """Callback function for xgboost to support multiple custom evaluation functions""" + from xgboost.core import EarlyStopException + from xgboost.callback import _fmt_metric + from xgboost.training import aggcv + + state = {} + metric_shortname = metric.split("-")[1] + + def init(env): + """internal function""" + bst = env.model + + state['maximize_score'] = maximize + state['best_iteration'] = 0 + if maximize: + state['best_score'] = float('-inf') + else: + state['best_score'] = float('inf') + + if bst is not None: + if bst.attr('best_score') is not None: + state['best_score'] = float(bst.attr('best_score')) + state['best_iteration'] = int(bst.attr('best_iteration')) + state['best_msg'] = bst.attr('best_msg') + else: + bst.set_attr(best_iteration=str(state['best_iteration'])) + bst.set_attr(best_score=str(state['best_score'])) + else: + assert env.cvfolds is not None + + def callback(env): + """internal function""" + if not state: + init(env) + + bst = env.model + i = env.iteration + cvfolds = env.cvfolds + + res_dict = {} + + if i % skip_every == 1: + return + + ##### evaluation ##### + if cvfolds is not None: + for feval in fevals: + tmp = aggcv([f.eval(i, feval) for f in cvfolds]) + for k, mean, std in tmp: + res_dict[k] = [mean, std] + else: + for feval in fevals: + bst_eval = bst.eval_set(evals, i, feval) + res = [x.split(':') for x in bst_eval.split()] + for kv in res[1:]: + res_dict[kv[0]] = [float(kv[1])] + + eval_res = [] + keys = list(res_dict.keys()) + keys.sort(key=lambda x: x if metric_shortname not in x else "a" + x) + for key in keys: + v = res_dict[key] + eval_res.append([key] + v) + + ##### print eval result ##### + if not isinstance(verbose_eval, bool) and verbose_eval and i % verbose_eval == 0: + infos = ["XGB iter: %3d" % i] + for item in eval_res: + if 'null' in item[0]: + continue + infos.append("%s: %.6f" % (item[0], item[1])) + + logger.debug("\t".join(infos)) + if log_file: + with open(log_file, "a") as fout: + fout.write("\t".join(infos) + '\n') + + ##### choose score and do early stopping ##### + score = None + for item in eval_res: + if item[0] == metric: + score = item[1] + break + assert score is not None + + best_score = state['best_score'] + best_iteration = state['best_iteration'] + maximize_score = state['maximize_score'] + if (maximize_score and score > best_score) or \ + (not maximize_score and score < best_score): + msg = '[%d] %s' % ( + env.iteration, + '\t'.join([_fmt_metric(x) for x in eval_res])) + state['best_msg'] = msg + state['best_score'] = score + state['best_iteration'] = env.iteration + # save the property to attributes, so they will occur in checkpoint. + if env.model is not None: + env.model.set_attr(best_score=str(state['best_score']), + best_iteration=str(state['best_iteration']), + best_msg=state['best_msg']) + elif env.iteration - best_iteration >= stopping_rounds: + best_msg = state['best_msg'] + if verbose_eval and env.rank == 0: + logger.debug("XGB stopped. Best iteration: %s ", best_msg) + raise EarlyStopException(best_iteration) + + return callback diff --git a/python/tvm/ansor/measure.py b/python/tvm/ansor/measure.py index e10da09e4b5a..e35a73148f3a 100644 --- a/python/tvm/ansor/measure.py +++ b/python/tvm/ansor/measure.py @@ -35,6 +35,8 @@ from tvm.runtime import Object, module, ndarray from tvm.driver import build_module from tvm.ir import transform +from tvm.rpc.tracker import Tracker +from tvm.rpc.server import Server from ..contrib import tar, ndk from .utils import get_const_tuple, NoDaemonPool, call_func_with_timeout, request_remote, check_remote from .compute_dag import LayoutRewriteLevel @@ -190,6 +192,52 @@ def __init__(self, key, host, port, priority=1, "and make sure you have free devices on the queue status.") +class RPCRunnerWarpper: + def __init__(self, target=None, priority=1, + n_parallel=1, + timeout=10, + number=3, + repeat=1, + min_repeat_ms=0, + cooldown_interval=0.0): + self.target = target + self.priority = priority + self.n_parallel = n_parallel + self.timeout = timeout + self.number = number + self.repeat = repeat + self.min_repeat_ms = min_repeat_ms + self.cooldown_interval = cooldown_interval + + self.tracker = None + self.server = None + self.runner = None + + def __enter__(self): + if self.target == "cuda": + ctx = tvm.context("cuda", 0) + cuda_arch = "sm_" + "".join(ctx.compute_version.split('.')) + tvm.autotvm.measure.measure_methods.set_cuda_target_arch(cuda_arch) + host = '0.0.0.0' + self.tracker = Tracker(host, port=9000, port_end=10000, silent=True) + device_key = '$local$device$%d' % self.tracker.port + self.server = Server(host, port=self.tracker.port, port_end=10000, + key=device_key, + use_popen=True, silent=True, + tracker_addr=(self.tracker.host, self.tracker.port)) + self.runner = RPCRunner(device_key, host, self.tracker.port, self.priority, + self.n_parallel, self.timeout, self.number, self.repeat, + self.min_repeat_ms, self.cooldown_interval) + + return self + + def __exit__(self, type, value, trace): + if value: + raise value + + self.tracker.terminate() + self.server.terminate() + MAX_ERROR_MSG_LEN = 512 diff --git a/src/ansor/cost_model/cost_model.cc b/src/ansor/cost_model/cost_model.cc index 8e0936071774..bbf15a241974 100644 --- a/src/ansor/cost_model/cost_model.cc +++ b/src/ansor/cost_model/cost_model.cc @@ -37,7 +37,7 @@ using ::tvm::runtime::NDArray; TVM_REGISTER_OBJECT_TYPE(CostModelNode); TVM_REGISTER_OBJECT_TYPE(RandomModelNode); TVM_REGISTER_OBJECT_TYPE(MeasureModelNode); -TVM_REGISTER_OBJECT_TYPE(PythonBasedCostModelNode); +TVM_REGISTER_OBJECT_TYPE(PythonBasedModelNode); void RandomNumber(TVMArgs args, TVMRetValue* rv) { int n = args[0]; @@ -101,30 +101,30 @@ void MeasureModelNode::Predict(const SearchTask& task, } } -CostModel PythonBasedCostModelNode::make(PackedFunc update_func, - PackedFunc predict_func, - PackedFunc predict_stage_func) { - auto node = make_object(); +CostModel PythonBasedModelNode::make(PackedFunc update_func, + PackedFunc predict_func, + PackedFunc predict_stage_func) { + auto node = make_object(); node->update_func = std::move(update_func); node->predict_func = std::move(predict_func); node->predict_stage_func = std::move(predict_stage_func); return CostModel(node); } -void PythonBasedCostModelNode::Update(const Array& inputs, - const Array& results) { +void PythonBasedModelNode::Update(const Array& inputs, + const Array& results) { update_func(inputs, results); } -void PythonBasedCostModelNode::Predict(const SearchTask& task, - const std::vector& states, - std::vector* scores) { +void PythonBasedModelNode::Predict(const SearchTask& task, + const std::vector& states, + std::vector* scores) { scores->resize(states.size()); predict_func(task, Array(states.begin(), states.end()), static_cast(scores->data())); } -void PythonBasedCostModelNode::PredictStages( +void PythonBasedModelNode::PredictStages( const SearchTask& task, const std::vector& states, std::vector* state_scores, std::vector>* stage_scores) { @@ -188,5 +188,12 @@ TVM_REGISTER_GLOBAL("ansor.RandomModel").set_body_typed([]() { return RandomModelNode::make(); }); +TVM_REGISTER_GLOBAL("ansor.PythonBasedModel") +.set_body_typed([](PackedFunc update_func, PackedFunc predict_func, + PackedFunc predict_stage_func) { + return PythonBasedModelNode::make(update_func, predict_func, + predict_stage_func); +}); + } // namespace ansor } // namespace tvm diff --git a/src/ansor/cost_model/cost_model.h b/src/ansor/cost_model/cost_model.h index 9daf01197bbf..472a3c201068 100644 --- a/src/ansor/cost_model/cost_model.h +++ b/src/ansor/cost_model/cost_model.h @@ -92,7 +92,7 @@ class MeasureModelNode : public CostModelNode { /*! \brief A wrapper for cost model defined by python code * This class will call python's function */ -class PythonBasedCostModelNode: public CostModelNode { +class PythonBasedModelNode: public CostModelNode { public: PackedFunc update_func; PackedFunc predict_func; @@ -108,8 +108,8 @@ class PythonBasedCostModelNode: public CostModelNode { std::vector* state_scores, std::vector>* stage_scores) final; - static constexpr const char *_type_key = "ansor.PythonBasedCostModel"; - TVM_DECLARE_FINAL_OBJECT_INFO(PythonBasedCostModelNode, CostModelNode); + static constexpr const char *_type_key = "ansor.PythonBasedModel"; + TVM_DECLARE_FINAL_OBJECT_INFO(PythonBasedModelNode, CostModelNode); }; } // namespace ansor diff --git a/src/ansor/search_policy/meta_tile_rewrite_policy.cc b/src/ansor/search_policy/meta_tile_rewrite_policy.cc index 86a7eba1da3a..f086a8879abb 100644 --- a/src/ansor/search_policy/meta_tile_rewrite_policy.cc +++ b/src/ansor/search_policy/meta_tile_rewrite_policy.cc @@ -1397,24 +1397,27 @@ void MetaTileRewritePolicyNode::EvolutionarySearch( int id = RandomChoose(prefix_sum_probs, &rand_gen_); if (dis(rand_gen_) < mutation_prob) { - const std::vector rule_prefix_sum_probs{0.9, 0.95, 1.0}; + const std::vector rule_prefix_sum_probs{0.9, 1.0}; int rule_id = RandomChoose(rule_prefix_sum_probs, &rand_gen_); - State tmp_s; if (rule_id == 0) { - tmp_s = RandomMutateTileSize((*pnow)[id], &split_memo_, &rand_gen_, + // Mutate Tile Size + State tmp_s = RandomMutateTileSize((*pnow)[id], &split_memo_, &rand_gen_, cur_task_->hardware_params->max_innermost_split_factor); + if (tmp_s.defined()) { + pnext->push_back(std::move(tmp_s)); + } else { + mutation_fail_ct++; + } } else if (rule_id == 1) { - tmp_s = RandomMutateMaxUnrollStep((*pnow)[id], &rand_gen_, auto_unroll_configs); - } else if (rule_id == 2) { - tmp_s = MutataParallel((*pnow)[id], &split_memo_, &rand_gen_, cur_task_); - } - - if (tmp_s.defined()) { - pnext->push_back(std::move(tmp_s)); - } else { - mutation_fail_ct++; + // Mutate auto-unroll max step. + State tmp_s = RandomMutateMaxUnrollStep((*pnow)[id], &rand_gen_, auto_unroll_configs); + if (tmp_s.defined()) { + pnext->push_back(std::move(tmp_s)); + } else { + mutation_fail_ct++; + } } } else { pnext->push_back((*pnow)[id]); diff --git a/tests/python/unittest/test_ansor_search_policy.py b/tests/python/unittest/test_ansor_search_policy.py index 9a57691aba22..6636787e807f 100644 --- a/tests/python/unittest/test_ansor_search_policy.py +++ b/tests/python/unittest/test_ansor_search_policy.py @@ -24,28 +24,25 @@ import tvm from tvm import ansor -from tvm.rpc.tracker import Tracker -from tvm.rpc.server import Server from test_ansor_common import matmul_nkkm -def search_common(target="llvm", seed=random.randint(1, 1 << 30), runner='local'): +def search_common(target="llvm", seed=random.randint(1, 1 << 30), runner='local', + cost_model=ansor.RandomModel(), n_trials=2): print("Test %s schedule search with the default search policy" % (target)) + random.seed(seed) N = 128 A, B, C = matmul_nkkm(N, N, N) dag = ansor.ComputeDAG([A, B, C]) tgt = tvm.target.create(target) task = ansor.SearchTask(dag, "test", tgt) - random.seed(seed) - with tempfile.NamedTemporaryFile() as fp: log_file = fp.name - cost_model = ansor.RandomModel() search_policy = ansor.MetaTileRewritePolicy(cost_model, seed=seed) - tune_option = ansor.TuneOption(n_trials=2, runner=runner, + tune_option = ansor.TuneOption(n_trials=n_trials, runner=runner, callbacks=[ansor.LogToFile(log_file)]) state = ansor.auto_schedule(task, search_policy, tune_option=tune_option) @@ -83,48 +80,30 @@ def test_search_basic(): search_common(seed=944563397) +def test_search_xgb_model_rpc_runner(): + with ansor.RPCRunnerWarpper() as rpc_runner: + search_common(seed=456787236, cost_model=ansor.XGBModel(), + runner=rpc_runner.runner) + + def test_search_opencl(): if tvm.context("opencl", 0).exist: - host = '0.0.0.0' - tracker = Tracker(host, port=9000, port_end=10000, silent=True) - device_key = '$local$device$%d' % tracker.port - server = Server(host, port=tracker.port, port_end=10000, - key=device_key, - use_popen=True, silent=True, - tracker_addr=(tracker.host, tracker.port)) - rpc_runner = ansor.RPCRunner(device_key, host, tracker.port) - - search_common("opencl", 380344973, rpc_runner) - - tracker.terminate() - server.terminate() + with ansor.RPCRunnerWarpper() as rpc_runner: + search_common("opencl", 380344973, rpc_runner.runner) else: print("OpenCL device not found, skip this test.") def test_search_cuda(): - ctx = tvm.context("cuda", 0) - if ctx.exist: - cuda_arch = "sm_" + "".join(ctx.compute_version.split('.')) - tvm.autotvm.measure.measure_methods.set_cuda_target_arch(cuda_arch) - host = '0.0.0.0' - tracker = Tracker(host, port=9000, port_end=10000, silent=True) - device_key = '$local$device$%d' % tracker.port - server = Server(host, port=tracker.port, port_end=10000, - key=device_key, - use_popen=True, silent=True, - tracker_addr=(tracker.host, tracker.port)) - rpc_runner = ansor.RPCRunner(device_key, host, tracker.port) - - search_common("cuda", 903667810, rpc_runner) - - tracker.terminate() - server.terminate() + if tvm.context("cuda", 0).exist: + with ansor.RPCRunnerWarpper("cuda") as rpc_runner: + search_common("cuda", 903667810, rpc_runner.runner) else: print("CUDA device not found, skip this test.") if __name__ == "__main__": test_search_basic() + test_search_xgb_model_rpc_runner() test_search_opencl() test_search_cuda()