diff --git a/CHANGELOG.md b/CHANGELOG.md
index ef098865de..2711530080 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@
 * Updated benchmarks and moved to asv_benchmarks/benchmarks (#1142)
 * Moved `_fast_kde`, `_fast_kde_2d`, `get_bins` and `_sturges_formula` to `numeric_utils` and `get_coords` to `utils` (#1142)
 * Rank plot: rename `axes` argument to `ax` (#1144)
+* Added a warning specifying log scale is now the default in compare/loo/waic functions ([#1150](https://github.com/arviz-devs/arviz/pull/1150))
 
 ### Deprecation
 
diff --git a/arviz/stats/stats.py b/arviz/stats/stats.py
index 6f6764d2df..095222ed2a 100644
--- a/arviz/stats/stats.py
+++ b/arviz/stats/stats.py
@@ -54,12 +54,12 @@ def compare(
 
     Parameters
     ----------
-    dataset_dict : dict[str] -> InferenceData
+    dataset_dict: dict[str] -> InferenceData
         A dictionary of model names and InferenceData objects
-    ic : str
+    ic: str
         Information Criterion (PSIS-LOO `loo` or WAIC `waic`) used to compare models. Defaults to
         ``rcParams["stats.information_criterion"]``.
-    method : str
+    method: str
         Method used to estimate the weights for each model. Available options are:
 
         - 'stacking' : stacking of predictive distributions.
@@ -72,46 +72,49 @@ def compare(
     b_samples: int
         Number of samples taken by the Bayesian bootstrap estimation.
         Only useful when method = 'BB-pseudo-BMA'.
-    alpha : float
+    alpha: float
         The shape parameter in the Dirichlet distribution used for the Bayesian bootstrap. Only
         useful when method = 'BB-pseudo-BMA'. When alpha=1 (default), the distribution is uniform
         on the simplex. A smaller alpha will keeps the final weights more away from 0 and 1.
-    seed : int or np.random.RandomState instance
+    seed: int or np.random.RandomState instance
         If int or RandomState, use it for seeding Bayesian bootstrap. Only
         useful when method = 'BB-pseudo-BMA'. Default None the global
         np.random state is used.
-    scale : str
+    scale: str
         Output scale for IC. Available options are:
 
         - `log` : (default) log-score (after Vehtari et al. (2017))
         - `negative_log` : -1 * (log-score)
         - `deviance` : -2 * (log-score)
 
+        A higher log-score (or a lower deviance) indicates a model with better predictive
+        accuracy.
+
     Returns
     -------
     A DataFrame, ordered from best to worst model (measured by information criteria).
     The index reflects the key with which the models are passed to this function. The columns are:
-    rank : The rank-order of the models. 0 is the best.
-    IC : Information Criteria (PSIS-LOO `loo` or WAIC `waic`).
+    rank: The rank-order of the models. 0 is the best.
+    IC: Information Criteria (PSIS-LOO `loo` or WAIC `waic`).
         Higher IC indicates higher out-of-sample predictive fit ("better" model). Default LOO.
         If `scale` is `deviance` or `negative_log` smaller IC indicates
         higher out-of-sample predictive fit ("better" model).
-    pIC : Estimated effective number of parameters.
-    dIC : Relative difference between each IC (PSIS-LOO `loo` or WAIC `waic`)
+    pIC: Estimated effective number of parameters.
+    dIC: Relative difference between each IC (PSIS-LOO `loo` or WAIC `waic`)
           and the lowest IC (PSIS-LOO `loo` or WAIC `waic`).
           The top-ranked model is always 0.
     weight: Relative weight for each model.
         This can be loosely interpreted as the probability of each model (among the compared model)
         given the data. By default the uncertainty in the weights estimation is considered using
         Bayesian bootstrap.
-    SE : Standard error of the IC estimate.
+    SE: Standard error of the IC estimate.
         If method = BB-pseudo-BMA these values are estimated using Bayesian bootstrap.
-    dSE : Standard error of the difference in IC between each model and the top-ranked model.
+    dSE: Standard error of the difference in IC between each model and the top-ranked model.
         It's always 0 for the top-ranked model.
-    warning : A value of 1 indicates that the computation of the IC may not be reliable.
+    warning: A value of 1 indicates that the computation of the IC may not be reliable.
         This could be indication of WAIC/LOO starting to fail see
         http://arxiv.org/abs/1507.04544 for details.
-    scale : Scale used for the IC.
+    scale: Scale used for the IC.
 
     Examples
     --------
@@ -138,6 +141,12 @@ def compare(
     if scale == "log":
         scale_value = 1
         ascending = False
+        warnings.warn(
+            "\nThe scale is now log by default. Use 'scale' argument or "
+            "'stats.ic_scale' rcParam if you rely on a specific value.\nA higher "
+            "log-score (or a lower deviance) indicates a model with better predictive "
+            "accuracy."
+        )
     else:
         if scale == "negative_log":
             scale_value = -1
@@ -314,7 +323,7 @@ def hpd(
     var_names=None,
     coords=None,
     max_modes=10,
-    **kwargs
+    **kwargs,
 ):
     """
     Calculate highest posterior density (HPD) of array for given credible_interval.
@@ -323,31 +332,31 @@ def hpd(
 
     Parameters
     ----------
-    ary : obj
+    ary: obj
         object containing posterior samples.
         Any object that can be converted to an az.InferenceData object.
         Refer to documentation of az.convert_to_dataset for details.
-    credible_interval : float, optional
+    credible_interval: float, optional
         Credible interval to compute. Defaults to 0.94.
-    circular : bool, optional
+    circular: bool, optional
         Whether to compute the hpd taking into account `x` is a circular variable
         (in the range [-np.pi, np.pi]) or not. Defaults to False (i.e non-circular variables).
         Only works if multimodal is False.
-    multimodal : bool
+    multimodal: bool
         If true it may compute more than one hpd interval if the distribution is multimodal and the
         modes are well separated.
-    skipna : bool
+    skipna: bool
         If true ignores nan values when computing the hpd interval. Defaults to false.
-    group : str, optional
+    group: str, optional
          Specifies which InferenceData group should be used to calculate hpd.
          Defaults to 'posterior'
-    var_names : list, optional
+    var_names: list, optional
         Names of variables to include in the hpd report
     coords: mapping, optional
         Specifies the subset over to calculate hpd.
     max_modes: int, optional
         Specifies the maximume number of modes for multimodal case.
-    kwargs : dict, optional
+    kwargs: dict, optional
         Additional keywords passed to `wrap_xarray_ufunc`.
         See the docstring of :obj:`wrap_xarray_ufunc method </.stats_utils.wrap_xarray_ufunc>`.
 
@@ -404,7 +413,7 @@ def hpd(
     func_kwargs = {
         "credible_interval": credible_interval,
         "skipna": skipna,
-        "out_shape": (max_modes, 2,) if multimodal else (2,),
+        "out_shape": (max_modes, 2) if multimodal else (2,),
     }
     kwargs.setdefault("output_core_dims", [["hpd", "mode"] if multimodal else ["hpd"]])
     if not multimodal:
@@ -495,7 +504,7 @@ def _hpd_multimodal(ary, credible_interval, skipna, max_modes):
 
     intervals_splitted = np.split(intervals, np.where(np.diff(intervals) >= dx * 1.1)[0] + 1)
 
-    hpd_intervals = np.full((max_modes, 2,), np.nan,)
+    hpd_intervals = np.full((max_modes, 2), np.nan)
     for i, interval in enumerate(intervals_splitted):
         if i == max_modes:
             warnings.warn(
@@ -519,15 +528,15 @@ def loo(data, pointwise=False, reff=None, scale=None):
 
     Parameters
     ----------
-    data : obj
+    data: obj
         Any object that can be converted to an az.InferenceData object. Refer to documentation
         of az.convert_to_inference_data for details
-    pointwise : bool, optional
+    pointwise: bool, optional
         if True the pointwise predictive accuracy will be returned. Defaults to False
-    reff : float, optional
+    reff: float, optional
         Relative MCMC efficiency, `ess / n` i.e. number of effective samples divided by
         the number of actual samples. Computed from trace by default.
-    scale : str
+    scale: str
         Output scale for loo. Available options are:
 
         - `log` : (default) log-score (after Vehtari et al. (2017))
@@ -540,15 +549,15 @@ def loo(data, pointwise=False, reff=None, scale=None):
     Returns
     -------
     pandas.Series with the following rows:
-    loo : approximated Leave-one-out cross-validation
-    loo_se : standard error of loo
-    p_loo : effective number of parameters
-    shape_warn : bool
+    loo: approximated Leave-one-out cross-validation
+    loo_se: standard error of loo
+    p_loo: effective number of parameters
+    shape_warn: bool
         True if the estimated shape parameter of
         Pareto distribution is greater than 0.7 for one or more samples
-    loo_i : array of pointwise predictive accuracy, only if pointwise True
-    pareto_k : array of Pareto shape values, only if pointwise True
-    loo_scale : scale of the loo results
+    loo_i: array of pointwise predictive accuracy, only if pointwise True
+    pareto_k: array of Pareto shape values, only if pointwise True
+    loo_scale: scale of the loo results
 
         The returned object has a custom print method that overrides pd.Series method. It is
         specific to expected log pointwise predictive density (elpd) information criteria.
@@ -681,16 +690,16 @@ def psislw(log_weights, reff=1.0):
 
     Parameters
     ----------
-    log_weights : array
+    log_weights: array
         Array of size (n_observations, n_samples)
-    reff : float
+    reff: float
         relative MCMC efficiency, `ess / n`
 
     Returns
     -------
-    lw_out : array
+    lw_out: array
         Smoothed log weights
-    kss : array
+    kss: array
         Pareto tail indices
 
     References
@@ -743,17 +752,17 @@ def _psislw(log_weights, cutoff_ind, cutoffmin, k_min=1.0 / 3):
 
     Parameters
     ----------
-    log_weights : array
+    log_weights: array
         Array of length n_observations
-    cutoff_ind : int
-    cutoffmin : float
-    k_min : float
+    cutoff_ind: int
+    cutoffmin: float
+    k_min: float
 
     Returns
     -------
-    lw_out : array
+    lw_out: array
         Smoothed log weights
-    kss : float
+    kss: float
         Pareto tail index
     """
     x = np.asarray(log_weights)
@@ -805,14 +814,14 @@ def _gpdfit(ary):
 
     Parameters
     ----------
-    ary : array
+    ary: array
         sorted 1D data array
 
     Returns
     -------
-    k : float
+    k: float
         estimated shape parameter
-    sigma : float
+    sigma: float
         estimated scale parameter
     """
     prior_bs = 3
@@ -879,9 +888,9 @@ def r2_score(y_true, y_pred):
 
     Parameters
     ----------
-    y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
+    y_true: array-like of shape = (n_samples) or (n_samples, n_outputs)
         Ground truth (correct) target values.
-    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
+    y_pred: array-like of shape = (n_samples) or (n_samples, n_outputs)
         Estimated target values.
 
     Returns
@@ -935,22 +944,22 @@ def summary(
 
     Parameters
     ----------
-    data : obj
+    data: obj
         Any object that can be converted to an az.InferenceData object
         Refer to documentation of az.convert_to_dataset for details
-    var_names : list
+    var_names: list
         Names of variables to include in summary
-    fmt : {'wide', 'long', 'xarray'}
+    fmt: {'wide', 'long', 'xarray'}
         Return format is either pandas.DataFrame {'wide', 'long'} or xarray.Dataset {'xarray'}.
-    kind : {'all', 'stats', 'diagnostics'}
+    kind: {'all', 'stats', 'diagnostics'}
         Whether to include the `stats`: `mean`, `sd`, `hpd_3%`, `hpd_97%`, or the `diagnostics`:
         `mcse_mean`, `mcse_sd`, `ess_bulk`, `ess_tail`, and `r_hat`. Default to include `all` of
         them.
-    round_to : int
+    round_to: int
         Number of decimals used to round results. Defaults to 2. Use "none" to return raw numbers.
-    include_circ : bool
+    include_circ: bool
         Whether to include circular statistics
-    stat_funcs : dict
+    stat_funcs: dict
         A list of functions or a dict of functions with function names as keys used to calculate
         statistics. By default, the mean, standard deviation, simulation standard error, and
         highest posterior density intervals are included.
@@ -958,18 +967,18 @@ def summary(
         The functions will be given one argument, the samples for a variable as an nD array,
         The functions should be in the style of a ufunc and return a single number. For example,
         `np.mean`, or `scipy.stats.var` would both work.
-    extend : boolean
+    extend: boolean
         If True, use the statistics returned by ``stat_funcs`` in addition to, rather than in place
         of, the default statistics. This is only meaningful when ``stat_funcs`` is not None.
-    credible_interval : float, optional
+    credible_interval: float, optional
         Credible interval to plot. Defaults to 0.94. This is only meaningful when ``stat_funcs`` is
         None.
-    order : {"C", "F"}
+    order: {"C", "F"}
         If fmt is "wide", use either C or F unpacking order. Defaults to C.
-    index_origin : int
+    index_origin: int
         If fmt is "wide, select n-based indexing for multivariate parameters.
         Defaults to rcParam data.index.origin, which is 0.
-    skipna : bool
+    skipna: bool
         If true ignores nan values when computing the summary statistics, it does not affect the
         behaviour of the functions passed to ``stat_funcs``. Defaults to false.
     coords: Dict[str, List[Any]], optional
@@ -1239,13 +1248,13 @@ def waic(data, pointwise=False, scale=None):
 
     Parameters
     ----------
-    data : obj
+    data: obj
         Any object that can be converted to an az.InferenceData object
         Refer to documentation of az.convert_to_inference_data for details
-    pointwise : bool
+    pointwise: bool
         if True the pointwise predictive accuracy will be returned.
         Default False
-    scale : str
+    scale: str
         Output scale for WAIC. Available options are:
 
         - `log` : (default) log-score
@@ -1258,14 +1267,14 @@ def waic(data, pointwise=False, scale=None):
     Returns
     -------
     Series with the following rows:
-    waic : widely available information criterion
-    waic_se : standard error of waic
-    p_waic : effective number parameters
-    var_warn : bool
+    waic: widely available information criterion
+    waic_se: standard error of waic
+    p_waic: effective number parameters
+    var_warn: bool
         True if posterior variance of the log predictive
         densities exceeds 0.4
-    waic_i : and array of the pointwise predictive accuracy, only if pointwise True
-    waic_scale : scale of the waic results
+    waic_i: and array of the pointwise predictive accuracy, only if pointwise True
+    waic_scale: scale of the waic results
 
         The returned object has a custom print method that overrides pd.Series method. It is
         specific to expected log pointwise predictive density (elpd) information criteria.
@@ -1378,21 +1387,21 @@ def loo_pit(idata=None, *, y=None, y_hat=None, log_weights=None):
 
     Parameters
     ----------
-    idata : InferenceData
+    idata: InferenceData
         InferenceData object.
-    y : array, DataArray or str
+    y: array, DataArray or str
         Observed data. If str, idata must be present and contain the observed data group
-    y_hat : array, DataArray or str
+    y_hat: array, DataArray or str
         Posterior predictive samples for ``y``. It must have the same shape as y plus an
         extra dimension at the end of size n_samples (chains and draws stacked). If str or
         None, idata must contain the posterior predictive group. If None, y_hat is taken
         equal to y, thus, y must be str too.
-    log_weights : array or DataArray
+    log_weights: array or DataArray
         Smoothed log_weights. It must have the same shape as ``y_hat``
 
     Returns
     -------
-    loo_pit : array or DataArray
+    loo_pit: array or DataArray
         Value of the LOO-PIT at each observed data point.
 
     Examples
@@ -1533,50 +1542,50 @@ def apply_test_function(
 
     Parameters
     ----------
-    idata : InferenceData
+    idata: InferenceData
         InferenceData object on which to apply the test function. This function will add
         new variables to the InferenceData object to store the result without modifying the
         existing ones.
-    func : callable
+    func: callable
         Callable that calculates the test function. It must have the following call signature
         ``func(y, theta, *args, **kwargs)`` (where ``y`` is the observed data or posterior
         predictive and ``theta`` the model parameters) even if not all the arguments are
         used.
-    group : str, optional
+    group: str, optional
         Group on which to apply the test function. Can be observed_data, posterior_predictive
         or both.
-    var_names : dict group -> var_names, optional
+    var_names: dict group -> var_names, optional
         Mapping from group name to the variables to be passed to func. It can be a dict of
         strings or lists of strings. There is also the option of using ``both`` as key,
         in which case, the same variables are used in observed data and posterior predictive
         groups
-    pointwise : bool, optional
+    pointwise: bool, optional
         If True, apply the test function to each observation and sample, otherwise, apply
         test function to each sample.
-    out_data_shape, out_pp_shape : tuple, optional
+    out_data_shape, out_pp_shape: tuple, optional
         Output shape of the test function applied to the observed/posterior predictive data.
         If None, the default depends on the value of pointwise.
-    out_name_data, out_name_pp : str, optional
+    out_name_data, out_name_pp: str, optional
         Name of the variables to add to the observed_data and posterior_predictive datasets
         respectively. ``out_name_pp`` can be ``None``, in which case will be taken equal to
         ``out_name_data``.
-    func_args : sequence, optional
+    func_args: sequence, optional
         Passed as is to ``func``
-    func_kwargs : mapping, optional
+    func_kwargs: mapping, optional
         Passed as is to ``func``
-    wrap_data_kwargs, wrap_pp_kwargs : mapping, optional
+    wrap_data_kwargs, wrap_pp_kwargs: mapping, optional
         kwargs passed to ``az.stats.wrap_xarray_ufunc``. By default, some suitable input_core_dims
         are used.
-    inplace : bool, optional
+    inplace: bool, optional
         If True, add the variables inplace, othewise, return a copy of idata with the variables
         added.
-    overwrite : bool, optional
+    overwrite: bool, optional
         Overwrite data in case ``out_name_data`` or ``out_name_pp`` are already variables in
         dataset. If ``None`` it will be the opposite of inplace.
 
     Returns
     -------
-    idata : InferenceData
+    idata: InferenceData
         Output InferenceData object. If ``inplace=True``, it is the same input object modified
         inplace.
 
diff --git a/arviz/stats/stats_utils.py b/arviz/stats/stats_utils.py
index e26af7b3cb..7239ce8c65 100644
--- a/arviz/stats/stats_utils.py
+++ b/arviz/stats/stats_utils.py
@@ -439,6 +439,11 @@ def get_log_likelihood(idata, var_name=None):
    (0.7, 1]   (bad)      {{4:{0}d}} {{8:6.1f}}%
    (1, Inf)   (very bad) {{5:{0}d}} {{9:6.1f}}%
 """
+SCALE_WARNING_FORMAT = """
+The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if 
+you rely on a specific value. 
+A higher log-score (or a lower deviance) indicates a model with better predictive 
+accuracy."""
 SCALE_DICT = {"deviance": "deviance", "log": "elpd", "negative_log": "-elpd"}
 
 
@@ -475,6 +480,7 @@ def __str__(self):
                 "Count", "Pct.", *[*counts, *(counts / np.sum(counts) * 100)]
             )
             base = "\n".join([base, extended])
+        base = "\n".join([base, SCALE_WARNING_FORMAT])
         return base
 
     def __repr__(self):
diff --git a/arviz/tests/base_tests/test_stats.py b/arviz/tests/base_tests/test_stats.py
index 6ee7811847..9c2e2dbc65 100644
--- a/arviz/tests/base_tests/test_stats.py
+++ b/arviz/tests/base_tests/test_stats.py
@@ -48,13 +48,13 @@ def test_hpd():
 def test_hpd_2darray():
     normal_sample = np.random.randn(12000, 5)
     result = hpd(normal_sample)
-    assert result.shape == (5, 2,)
+    assert result.shape == (5, 2)
 
 
 def test_hpd_multidimension():
     normal_sample = np.random.randn(12000, 10, 3)
     result = hpd(normal_sample)
-    assert result.shape == (3, 2,)
+    assert result.shape == (3, 2)
 
 
 def test_hpd_idata(centered_eight):
@@ -429,7 +429,6 @@ def test_loo_print(centered_eight, scale):
     assert loo_data is not None
     assert loo_pointwise is not None
     assert len(loo_data) < len(loo_pointwise)
-    assert loo_data == loo_pointwise[: len(loo_data)]
 
 
 def test_psislw(centered_eight):