diff --git a/.github/workflows/probot-auto-cc.yml b/.github/workflows/probot-auto-cc.yml
index 0595c4eee65f7f..a6d75ad175eec3 100644
--- a/.github/workflows/probot-auto-cc.yml
+++ b/.github/workflows/probot-auto-cc.yml
@@ -2,16 +2,14 @@ name: Probot
 
 on:
   issues:
-    types:
-      - labeled
+    types: [labeled]
   pull_request:
-    types:
-      - labeled
+    types: [labeled, ready_for_review]
 
 jobs:
   auto-cc:
-    if: ${{ github.repository_owner == 'PyTorchLightning' }}
     runs-on: ubuntu-latest
+    if: github.event_name == "issue" || github.event.pull_request.draft == false
     steps:
       - uses: carmocca/probot@v1
         env:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 52a5e15b217565..9c8b4714fa4635 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,10 +31,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - The `monitor` argument in the `EarlyStopping` callback is no longer optional ([#10328](https://github.com/PyTorchLightning/pytorch-lightning/pull/10328))
 
 
-- Moved `precision_plugin` into `Training_type_plugin` and updated reference ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570))
+- Do not fail if batch size could not be inferred for logging when using DeepSpeed ([#10438](https://github.com/PyTorchLightning/pytorch-lightning/issues/10438))
 
 
--
+- Raise `MisconfigurationException` when `enable_progress_bar=False` and a progress bar instance has been passed in the callback list ([#10520](https://github.com/PyTorchLightning/pytorch-lightning/issues/10520))
+
+
+-  Moved `precision_plugin` into `Training_type_plugin` and updated reference ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570))
 
 
 -
@@ -127,9 +130,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482))
 
 
+- Removed deprecated `disable_validation` property from Trainer ([#10450](https://github.com/PyTorchLightning/pytorch-lightning/pull/10450))
+
+
 - Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/PyTorchLightning/pytorch-lightning/pull/10525))
 
 
+- Removed deprecated `reload_dataloaders_every_epoch` from `Trainer` in favour of `reload_dataloaders_every_n_epochs` ([#10481](https://github.com/PyTorchLightning/pytorch-lightning/pull/10481))
+
+
 - Removed `precision_plugin` from `Accelerator` in favor of `precision_plugin` in `training_type_plugin` ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570))
 
 
@@ -141,15 +150,26 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed `CombinedLoader` and `max_size_cycle` didn't receive a `DistributedSampler` ([#10374](https://github.com/PyTorchLightning/pytorch-lightning/issues/10374))
 
 
+- Fixed scripting causing false positive deprecation warnings ([#10470](https://github.com/PyTorchLightning/pytorch-lightning/pull/10470), [#10555](https://github.com/PyTorchLightning/pytorch-lightning/pull/10555))
+
+
+- Fixed `isinstance` not working with `init_meta_context`, materialized model not being moved to the device ([#10493](https://github.com/PyTorchLightning/metrics/pull/10493))
+
+
 - Fixed an issue that prevented the Trainer to shutdown workers when execution is interrupted due to failure([#10463](https://github.com/PyTorchLightning/pytorch-lightning/issues/10463))
 
 
 - Squeeze the early stopping monitor to remove empty tensor dimensions ([#10461](https://github.com/PyTorchLightning/pytorch-lightning/issues/10461))
 
 
--
+- Fixed sampler replacement logic with `overfit_batches` to only replace the sample when `SequentialSampler` is not used ([#10486](https://github.com/PyTorchLightning/pytorch-lightning/issues/10486))
 
 
+- Fixed propagation of device and dtype information to submodules of LightningLite when they inherit from `DeviceDtypeModuleMixin` ([#10559](https://github.com/PyTorchLightning/pytorch-lightning/issues/10559))
+
+
+-
+
 ## [1.5.1] - 2021-11-09
 
 ### Fixed
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
index 26b33f3ed95e00..2363e862fbcf1c 100644
--- a/docs/source/_templates/layout.html
+++ b/docs/source/_templates/layout.html
@@ -4,7 +4,7 @@
 {% block footer %}
 {{ super() }}
 <script script type="text/javascript">
-  var collapsedSections = ['Best practices', 'Lightning API', 'Optional extensions', 'Tutorials', 'API References', 'Bolts', 'Examples', 'Partner Domain Frameworks', 'Community'];
+  var collapsedSections = ['Best practices', 'Optional extensions', 'Tutorials', 'API References', 'Bolts', 'Examples', 'Partner Domain Frameworks', 'Community'];
 </script>
 
 {% endblock %}
diff --git a/pl_examples/loop_examples/kfold.py b/pl_examples/loop_examples/kfold.py
index bd14d42eb796fb..ed4db6faa50118 100644
--- a/pl_examples/loop_examples/kfold.py
+++ b/pl_examples/loop_examples/kfold.py
@@ -205,7 +205,7 @@ def on_run_end(self) -> None:
         voting_model = EnsembleVotingModel(type(self.trainer.lightning_module), checkpoint_paths)
         voting_model.trainer = self.trainer
         # This requires to connect the new model and move it the right device.
-        self.trainer.accelerator.connect(voting_model)
+        self.trainer.training_type_plugin.connect(voting_model)
         self.trainer.training_type_plugin.model_to_device()
         self.trainer.test_loop.run()
 
diff --git a/pl_examples/loop_examples/yielding_training_step.py b/pl_examples/loop_examples/yielding_training_step.py
index 4d870f002e247c..3e3082e3f9fac6 100644
--- a/pl_examples/loop_examples/yielding_training_step.py
+++ b/pl_examples/loop_examples/yielding_training_step.py
@@ -86,7 +86,7 @@ def _training_step(self, generator):
         # Here, instead of calling `lightning_module.training_step()`
         # we call next() on the generator!
         training_step_output = next(generator)
-        self.trainer.accelerator.post_training_step()
+        self.trainer.training_type_plugin.post_training_step()
 
         training_step_output = self.trainer.call_hook("training_step_end", training_step_output)
 
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index b6f064d7d9802d..dc3ce5f0f4063b 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -115,6 +115,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self._param_requires_grad_state = {}
         self._metric_attributes: Optional[Dict[int, str]] = None
         self._should_prevent_trainer_and_dataloaders_deepcopy: bool = False
+        # TODO: remove after the 1.6 release
+        self._running_torchscript = False
 
         self._register_sharded_tensor_state_dict_hooks_if_available()
 
@@ -1893,6 +1895,8 @@ def to_torchscript(
         """
         mode = self.training
 
+        self._running_torchscript = True
+
         if method == "script":
             torchscript_module = torch.jit.script(self.eval(), **kwargs)
         elif method == "trace":
@@ -1918,6 +1922,8 @@ def to_torchscript(
             with fs.open(file_path, "wb") as f:
                 torch.jit.save(torchscript_module, f)
 
+        self._running_torchscript = False
+
         return torchscript_module
 
     @property
@@ -1927,11 +1933,12 @@ def model_size(self) -> float:
         Note:
             This property will not return correct value for Deepspeed (stage 3) and fully-sharded training.
         """
-        rank_zero_deprecation(
-            "The `LightningModule.model_size` property was deprecated in v1.5 and will be removed in v1.7."
-            " Please use the `pytorch_lightning.utilities.memory.get_model_size_mb`.",
-            stacklevel=5,
-        )
+        if not self._running_torchscript:  # remove with the deprecation removal
+            rank_zero_deprecation(
+                "The `LightningModule.model_size` property was deprecated in v1.5 and will be removed in v1.7."
+                " Please use the `pytorch_lightning.utilities.memory.get_model_size_mb`.",
+                stacklevel=5,
+            )
         return get_model_size_mb(self)
 
     def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
diff --git a/pytorch_lightning/core/mixins/device_dtype_mixin.py b/pytorch_lightning/core/mixins/device_dtype_mixin.py
index e02790edddd1e5..e8b122989cd9c5 100644
--- a/pytorch_lightning/core/mixins/device_dtype_mixin.py
+++ b/pytorch_lightning/core/mixins/device_dtype_mixin.py
@@ -17,6 +17,8 @@
 import torch
 from torch.nn import Module
 
+import pytorch_lightning as pl
+
 
 class DeviceDtypeModuleMixin(Module):
     __jit_unused_properties__ = ["device", "dtype"]
@@ -177,7 +179,9 @@ def __update_properties(
         self, device: Optional[torch.device] = None, dtype: Optional[Union[str, torch.dtype]] = None
     ) -> None:
         def apply_fn(module: Union["DeviceDtypeModuleMixin", Module]) -> None:
-            if not isinstance(module, DeviceDtypeModuleMixin):
+            # TODO: Find why `isinstance(module, DeviceDtypeModuleMixin)` doesn't
+            # work when using `init_meta_context`.
+            if not isinstance(module, (DeviceDtypeModuleMixin, pl.LightningModule)):
                 return
             if device is not None:
                 module._device = device
diff --git a/pytorch_lightning/lite/wrappers.py b/pytorch_lightning/lite/wrappers.py
index 615f4610552045..ff95e89d1d2cf6 100644
--- a/pytorch_lightning/lite/wrappers.py
+++ b/pytorch_lightning/lite/wrappers.py
@@ -24,6 +24,7 @@
 from torch.utils.data import DataLoader
 
 from pytorch_lightning.accelerators import Accelerator
+from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin
 from pytorch_lightning.plugins import PrecisionPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device
 
@@ -64,7 +65,7 @@ def step(self, closure: Optional[Callable] = None) -> None:
         )
 
 
-class _LiteModule(nn.Module):
+class _LiteModule(DeviceDtypeModuleMixin):
     def __init__(self, module: nn.Module, precision_plugin: PrecisionPlugin) -> None:
         """The LiteModule is a thin wrapper around the :class:`torch.nn.Module` and handles precision / autocast
         automatically for the forward pass.
diff --git a/pytorch_lightning/loggers/tensorboard.py b/pytorch_lightning/loggers/tensorboard.py
index f26fc75ac58dbb..1ceadb8658a3d4 100644
--- a/pytorch_lightning/loggers/tensorboard.py
+++ b/pytorch_lightning/loggers/tensorboard.py
@@ -240,7 +240,9 @@ def log_graph(self, model: "pl.LightningModule", input_array=None):
 
             if input_array is not None:
                 input_array = model._apply_batch_transfer_handler(input_array)
+                model._running_torchscript = True
                 self.experiment.add_graph(model, input_array)
+                model._running_torchscript = False
             else:
                 rank_zero_warn(
                     "Could not log computational graph since the"
diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index 2fc1e17d2f1198..01959bdcee212c 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -620,11 +620,6 @@ def _format_batch_size_and_grad_accum_config(self):
             )
         self.config["gradient_accumulation_steps"] = self.lightning_module.trainer.accumulate_grad_batches
         if "train_micro_batch_size_per_gpu" not in self.config:
-            rank_zero_warn(
-                "Inferring the batch size for internal deepspeed logging from the `train_dataloader()`. "
-                "If you require skipping this, please pass "
-                "`Trainer(strategy=DeepSpeedPlugin(logging_batch_size_per_gpu=batch_size))`"
-            )
             batch_size = self._auto_select_batch_size()
             self.config["train_micro_batch_size_per_gpu"] = batch_size
         if "gradient_clipping" not in self.config:
@@ -636,9 +631,19 @@ def _auto_select_batch_size(self):
         batch_size = 1
         train_dl_source = self.lightning_module.trainer._data_connector._train_dataloader_source
         if train_dl_source.is_defined():
-            train_dataloader = train_dl_source.dataloader()
-            if hasattr(train_dataloader, "batch_sampler"):
-                batch_size = train_dataloader.batch_sampler.batch_size
+            try:
+                train_dataloader = train_dl_source.dataloader()
+                if hasattr(train_dataloader, "batch_sampler"):
+                    batch_size = train_dataloader.batch_sampler.batch_size
+            # broad exception on purpose as `source.dataloader()` will fail if the dataloader requires `setup`
+            # to have been called before
+            except Exception:
+                if self.global_rank == 0:
+                    deepspeed.utils.logging.logger.warning(
+                        "Tried to infer the batch size for internal deepspeed logging from the `train_dataloader()`. "
+                        "To ensure DeepSpeed logging remains correct, please manually pass the plugin with the "
+                        "batch size, `Trainer(strategy=DeepSpeedPlugin(logging_batch_size_per_gpu=batch_size))`."
+                    )
         return batch_size
 
     def _format_precision_config(self):
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 160656d43b7c03..26e2f381e63002 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -238,21 +238,25 @@ def to_tensor(x):
         args = apply_to_collection(args, dtype=(int, float), function=to_tensor)
         return args
 
-    def training_step(self, *args, **kwargs):
+    def _step(self, stage: RunningStage, *args: Any, **kwargs: Any):
         args = self._prepare_input(args)
-        return self.poptorch_models[RunningStage.TRAINING](*args, **kwargs)
+        poptorch_model = self.poptorch_models[stage]
+        self.lightning_module._running_torchscript = True
+        out = poptorch_model(*args, **kwargs)
+        self.lightning_module._running_torchscript = False
+        return out
+
+    def training_step(self, *args, **kwargs):
+        return self._step(RunningStage.TRAINING, *args, **kwargs)
 
     def validation_step(self, *args, **kwargs):
-        args = self._prepare_input(args)
-        return self.poptorch_models[RunningStage.VALIDATING](*args, **kwargs)
+        return self._step(RunningStage.VALIDATING, *args, **kwargs)
 
     def test_step(self, *args, **kwargs):
-        args = self._prepare_input(args)
-        return self.poptorch_models[RunningStage.TESTING](*args, **kwargs)
+        return self._step(RunningStage.TESTING, *args, **kwargs)
 
     def predict_step(self, *args, **kwargs):
-        args = self._prepare_input(args)
-        return self.poptorch_models[RunningStage.PREDICTING](*args, **kwargs)
+        return self._step(RunningStage.PREDICTING, *args, **kwargs)
 
     def teardown(self) -> None:
         # undo dataloader patching
diff --git a/pytorch_lightning/trainer/connectors/callback_connector.py b/pytorch_lightning/trainer/connectors/callback_connector.py
index 4d41734ed90e6d..6a54e973ffcf3d 100644
--- a/pytorch_lightning/trainer/connectors/callback_connector.py
+++ b/pytorch_lightning/trainer/connectors/callback_connector.py
@@ -94,12 +94,9 @@ def on_trainer_init(
                 " bar pass `enable_progress_bar = False` to the Trainer."
             )
 
-        if enable_progress_bar:
-            self.trainer._progress_bar_callback = self.configure_progress_bar(
-                progress_bar_refresh_rate, process_position
-            )
-        else:
-            self.trainer._progress_bar_callback = None
+        self.trainer._progress_bar_callback = self.configure_progress_bar(
+            progress_bar_refresh_rate, process_position, enable_progress_bar
+        )
 
         # configure the ModelSummary callback
         self._configure_model_summary_callback(enable_model_summary, weights_summary)
@@ -215,7 +212,9 @@ def _configure_swa_callbacks(self):
         if not existing_swa:
             self.trainer.callbacks = [StochasticWeightAveraging()] + self.trainer.callbacks
 
-    def configure_progress_bar(self, refresh_rate=None, process_position=0):
+    def configure_progress_bar(
+        self, refresh_rate: Optional[int] = None, process_position: int = 0, enable_progress_bar: bool = True
+    ) -> Optional[ProgressBarBase]:
         if os.getenv("COLAB_GPU") and refresh_rate is None:
             # smaller refresh rate on colab causes crashes, choose a higher value
             refresh_rate = 20
@@ -229,7 +228,12 @@ def configure_progress_bar(self, refresh_rate=None, process_position=0):
             )
         if len(progress_bars) == 1:
             progress_bar_callback = progress_bars[0]
-        elif refresh_rate > 0:
+            if not enable_progress_bar:
+                raise MisconfigurationException(
+                    "Trainer was configured with `enable_progress_bar=False`"
+                    f" but found `{progress_bar_callback.__class__.__name__}` in callbacks list."
+                )
+        elif refresh_rate > 0 and enable_progress_bar:
             progress_bar_callback = TQDMProgressBar(refresh_rate=refresh_rate, process_position=process_position)
             self.trainer.callbacks.append(progress_bar_callback)
         else:
diff --git a/pytorch_lightning/trainer/connectors/data_connector.py b/pytorch_lightning/trainer/connectors/data_connector.py
index 90c398087578de..de81060ba1f805 100644
--- a/pytorch_lightning/trainer/connectors/data_connector.py
+++ b/pytorch_lightning/trainer/connectors/data_connector.py
@@ -64,7 +64,6 @@ def on_trainer_init(
         self,
         check_val_every_n_epoch: int,
         reload_dataloaders_every_n_epochs: int,
-        reload_dataloaders_every_epoch: bool,
         prepare_data_per_node: Optional[bool] = None,
     ) -> None:
         self.trainer.datamodule = None
@@ -83,13 +82,6 @@ def on_trainer_init(
 
         self.trainer.check_val_every_n_epoch = check_val_every_n_epoch
 
-        if reload_dataloaders_every_epoch:
-            reload_dataloaders_every_n_epochs = int(reload_dataloaders_every_epoch)
-            rank_zero_deprecation(
-                "`reload_dataloaders_every_epoch` is deprecated in v1.4 and will be removed in v1.6."
-                " Please use `reload_dataloaders_every_n_epochs` in Trainer."
-            )
-
         if not isinstance(reload_dataloaders_every_n_epochs, int) or (reload_dataloaders_every_n_epochs < 0):
             raise MisconfigurationException(
                 f"`reload_dataloaders_every_n_epochs` should be an int >= 0, got {reload_dataloaders_every_n_epochs}."
diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py
index 931f6a92958ee4..bdc051091b50c7 100644
--- a/pytorch_lightning/trainer/data_loading.py
+++ b/pytorch_lightning/trainer/data_loading.py
@@ -438,8 +438,7 @@ def _reset_eval_dataloader(
         for loader_i in range(len(dataloaders)):
             loader = dataloaders[loader_i]
 
-            if hasattr(loader, "sampler") and isinstance(loader.sampler, RandomSampler):
-
+            if hasattr(loader, "sampler") and not isinstance(loader.sampler, SequentialSampler):
                 # when overfitting, the dataloader should not have sampler
                 if self.overfit_batches > 0 and mode.evaluating:
                     rank_zero_warn(
@@ -591,16 +590,17 @@ def _add_sampler_metadata_collate(dataloader: DataLoader) -> None:
 
     @staticmethod
     def _resolve_overfit_batches(dataloader: Collection[DataLoader]) -> Collection[DataLoader]:
-        has_random_sampler = False
+        all_have_sequential_sampler = True
 
-        def resolve_had_random_sampler(dataloader: DataLoader):
-            nonlocal has_random_sampler
-            if not has_random_sampler:
-                has_random_sampler = isinstance(dataloader.sampler, RandomSampler)
+        def resolve_has_no_sequential_sampler(dataloader: DataLoader):
+            nonlocal all_have_sequential_sampler
+            all_have_sequential_sampler = all_have_sequential_sampler & isinstance(
+                dataloader.sampler, SequentialSampler
+            )
 
-        apply_to_collection(dataloader, DataLoader, resolve_had_random_sampler)
+        apply_to_collection(dataloader, DataLoader, resolve_has_no_sequential_sampler)
 
-        if has_random_sampler:
+        if not all_have_sequential_sampler:
             rank_zero_warn(
                 "You requested to overfit but enabled training dataloader shuffling."
                 " We are turning off the training dataloader shuffling for you."
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index bf28642e07cd11..f81ce0396e5bb9 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -84,7 +84,7 @@
 from pytorch_lightning.utilities.distributed import distributed_available
 from pytorch_lightning.utilities.exceptions import ExitGracefullyException, MisconfigurationException
 from pytorch_lightning.utilities.imports import _fault_tolerant_training
-from pytorch_lightning.utilities.meta import materialize_module
+from pytorch_lightning.utilities.meta import is_on_meta_device, materialize_module
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.seed import reset_seed
 from pytorch_lightning.utilities.types import (
@@ -162,7 +162,6 @@ def __init__(
         benchmark: bool = False,
         deterministic: bool = False,
         reload_dataloaders_every_n_epochs: int = 0,
-        reload_dataloaders_every_epoch: bool = False,
         auto_lr_find: Union[bool, str] = False,
         replace_sampler_ddp: bool = True,
         detect_anomaly: bool = False,
@@ -341,12 +340,6 @@ def __init__(
 
             reload_dataloaders_every_n_epochs: Set to a non-negative integer to reload dataloaders every n epochs.
 
-            reload_dataloaders_every_epoch: Set to True to reload dataloaders every epoch.
-
-                .. deprecated:: v1.4
-                    ``reload_dataloaders_every_epoch`` has been deprecated in v1.4 and will be removed in v1.6.
-                    Please use ``reload_dataloaders_every_n_epochs``.
-
             replace_sampler_ddp: Explicitly enables or disables sampler replacement. If not specified this
                 will toggled automatically when DDP is used. By default it will add ``shuffle=True`` for
                 train sampler and ``shuffle=False`` for val/test sampler. If you want to customize it,
@@ -515,7 +508,6 @@ def __init__(
         self._data_connector.on_trainer_init(
             check_val_every_n_epoch,
             reload_dataloaders_every_n_epochs,
-            reload_dataloaders_every_epoch,
             prepare_data_per_node,
         )
 
@@ -1406,10 +1398,21 @@ def _call_setup_hook(self) -> None:
 
     def _call_configure_sharded_model(self) -> None:
         with self.accelerator.model_sharded_context():
-            materialize_module(self.lightning_module)
+            self._handle_meta_model()
             self.call_hook("configure_sharded_model")
             self.call_hook("on_configure_sharded_model")
 
+    def _handle_meta_model(self) -> None:
+        if not is_on_meta_device(self.lightning_module):
+            return
+
+        if isinstance(self.training_type_plugin, DDPSpawnPlugin):
+            raise MisconfigurationException("LightningModule on meta device isn't supported with spawn.")
+
+        materialize_module(self.lightning_module)
+        # the trainer reference is lost during materialization
+        self.lightning_module.trainer = proxy(self)
+
     def _call_teardown_hook(self) -> None:
         fn = self.state.fn._setup_fn
 
@@ -1783,15 +1786,6 @@ def _should_reload_dl_epoch(self) -> bool:
         n_epochs = self.reload_dataloaders_every_n_epochs
         return n_epochs and (not self.current_epoch % n_epochs)
 
-    @property
-    def disable_validation(self) -> bool:
-        """Check if validation is disabled during training."""
-        rank_zero_deprecation(
-            "`trainer.disable_validation` is deprecated in v1.4 and will be removed in v1.6."
-            " Use `not trainer.enable_validation` instead."
-        )
-        return not self.enable_validation
-
     @property
     def enable_validation(self) -> bool:
         """Check if we should run validation during training."""
diff --git a/pytorch_lightning/utilities/meta.py b/pytorch_lightning/utilities/meta.py
index 60e6cc791b7aee..6d3c1d6b5f11bf 100644
--- a/pytorch_lightning/utilities/meta.py
+++ b/pytorch_lightning/utilities/meta.py
@@ -18,13 +18,14 @@
 from functools import partial
 from itertools import chain
 from types import ModuleType
-from typing import Callable, Dict, Generator, Iterator, List, Optional, Set, Type
+from typing import Any, Callable, Dict, Generator, Iterator, List, Optional, Set, Type
 
 import torch
 from torch import nn, Tensor
 from torch.nn import Module
 from torch.nn.modules.container import ModuleDict, ModuleList, Sequential
 
+import pytorch_lightning as pl
 from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_10
@@ -191,7 +192,6 @@ def materialize_module(root_module: nn.Module) -> nn.Module:
 
 # cache subclasses to optimize the search when resetting the meta device later on.
 __STORAGE_META__ = {}
-
 __CREATED_MODULES__ = set()
 
 
@@ -237,45 +237,52 @@ def _set_meta_device() -> None:
 
     for subclass in get_all_subclasses(torch.nn.modules.module.Module):
 
-        if isinstance(subclass, (Sequential, ModuleList, ModuleDict)):
+        if subclass in (Sequential, ModuleList, ModuleDict, pl.LightningModule):
             continue
 
         # if a subclass has already been stored, we should use the cache
         if str(subclass) in __STORAGE_META__:
-            # reset the class import package to its rightfull state.
+            # reset the class import package to its rightful state.
             mods, subclass, meta_class = __STORAGE_META__[subclass]
             for mod in mods:
                 setattr(mod, subclass.__name__, meta_class)
             continue
 
+        class _IsinstanceMetaclass(type(subclass)):
+            def __instancecheck__(self, instance: Any) -> bool:
+                """Overrides the ``isinstance`` check on ``_MaterializerModule`` objects."""
+                return isinstance(instance, self.__bases__[0])
+
         # Create a class subclassing current `subclass` overriding its new method.
         # this will enable use to use `torch.distributed.nn.utils.init_meta` to create a `meta`
         # version of the current subclass module
-        class _MetaClass(subclass):
+        class _MaterializerModule(subclass, metaclass=_IsinstanceMetaclass):
             @classmethod
             @contextmanager
-            def instantiation_context(cls, materialize: bool):
+            def instantiation_context(cls):
                 _unset_meta_device(from_created=True)
                 yield
                 _set_meta_device_populated(from_created=True)
 
             @classmethod
             def materialize(cls, materialize_fn: Callable):
-                with cls.instantiation_context(materialize=True):
+                with cls.instantiation_context():
                     obj = materialize_fn()
                 return obj
 
             @staticmethod
             def add_subclasses(subclass):
-                """This is used to unrol the instantion tree while creating the modules."""
-                __CREATED_MODULES__.add(subclass)
+                """This is used to unroll the instantiation tree while creating the modules."""
+                # Don't store the LightningModule as skipped from the Meta process.
+                if subclass != pl.LightningModule:
+                    __CREATED_MODULES__.add(subclass)
                 if subclass.__bases__[0] != torch.nn.modules.module.Module:
-                    _MetaClass.add_subclasses(subclass.__bases__[0])
+                    _MaterializerModule.add_subclasses(subclass.__bases__[0])
 
             def __new__(cls, *args, **kwargs):
                 subclass = cls.__bases__[0]
                 cls.add_subclasses(subclass)
-                with cls.instantiation_context(materialize=False):
+                with cls.instantiation_context():
                     obj = init_meta(subclass, *args, **kwargs)
 
                 obj.materialize = partial(cls.materialize, materialize_fn=obj.materialize)
@@ -294,9 +301,8 @@ def search(mod: ModuleType) -> List[ModuleType]:
         # nn.Module class can be imported at different level and they all need to be mocked.
         # Example: torch.nn.Linear is actually torch.nn.modules.linear.Linear
         # Therefore, torch.nn.Linear, torch.nn.modules.Linear, torch.nn.modules.linear.Linear
-        # needs to be replaced by the torch.nn.linear.modules.Linear _MetaClass
-        out = []
-        out.append(search(mod))
+        # needs to be replaced by the torch.nn.linear.modules.Linear _MaterializerModule
+        out = [search(mod)]
         for name in submodules[1:]:
             mod = getattr(mod, name)
             out.append(search(mod))
@@ -305,11 +311,11 @@ def search(mod: ModuleType) -> List[ModuleType]:
         mods = [mod for mod in chain(*out) if mod]
 
         # store the modules search so it doesn't have to be performed again for this class
-        __STORAGE_META__[subclass] = (mods, subclass, _MetaClass)
+        __STORAGE_META__[subclass] = (mods, subclass, _MaterializerModule)
 
         # replace all subclass by its meta form
         for mod in mods:
-            setattr(mod, subclass.__name__, _MetaClass)
+            setattr(mod, subclass.__name__, _MaterializerModule)
 
 
 @contextmanager
@@ -321,3 +327,11 @@ def init_meta_context() -> Generator:
     _set_meta_device()
     yield
     _unset_meta_device()
+
+
+def is_on_meta_device(module: nn.Module) -> bool:
+    try:
+        param = next(module.parameters())
+        return param.device.type == "meta"
+    except StopIteration:
+        return False
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index e70d862b048e0a..d005c487573302 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -86,7 +86,6 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
     assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment)
 
 
-@RunIf(min_gpus=2)
 @mock.patch.dict(
     os.environ,
     {
@@ -98,8 +97,10 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
         "SLURM_LOCALID": "1",
     },
 )
+@mock.patch("torch.cuda.set_device")
+@mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_accelerator_choice_ddp_slurm(setup_distributed_mock):
+def test_accelerator_choice_ddp_slurm(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert trainer._accelerator_connector._is_slurm_managing_tasks
@@ -111,13 +112,13 @@ def on_fit_start(self, trainer, pl_module):
             raise SystemExit()
 
     model = BoringModel()
-    trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
+    with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
+        trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
 
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
 
-@RunIf(min_gpus=2)
 @mock.patch.dict(
     os.environ,
     {
@@ -129,9 +130,10 @@ def on_fit_start(self, trainer, pl_module):
         "SLURM_LOCALID": "1",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_accelerator_choice_ddp2_slurm(device_count_mock, setup_distributed_mock):
+def test_accelerator_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert trainer._accelerator_connector._is_slurm_managing_tasks
@@ -143,13 +145,15 @@ def on_fit_start(self, trainer, pl_module):
             raise SystemExit()
 
     model = BoringModel()
-    trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
+    with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
+        trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
 
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -161,9 +165,10 @@ def on_fit_start(self, trainer, pl_module):
         "GROUP_RANK": "0",
     },
 )
-@mock.patch("torch.cuda.device_count", return_value=2)
+@mock.patch("torch.cuda.set_device")
+@mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_accelerator_choice_ddp_te(device_count_mock, setup_distributed_mock):
+def test_accelerator_choice_ddp_te(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -174,13 +179,15 @@ def on_fit_start(self, trainer, pl_module):
             raise SystemExit()
 
     model = BoringModel()
-    trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
+    with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
+        trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
 
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -192,9 +199,10 @@ def on_fit_start(self, trainer, pl_module):
         "GROUP_RANK": "0",
     },
 )
-@mock.patch("torch.cuda.device_count", return_value=2)
+@mock.patch("torch.cuda.set_device")
+@mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_accelerator_choice_ddp2_te(device_count_mock, setup_distributed_mock):
+def test_accelerator_choice_ddp2_te(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -205,11 +213,14 @@ def on_fit_start(self, trainer, pl_module):
             raise SystemExit()
 
     model = BoringModel()
-    trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
+    with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
+        trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
 
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
 @mock.patch.dict(
     os.environ, {"WORLD_SIZE": "2", "LOCAL_WORLD_SIZE": "2", "RANK": "1", "LOCAL_RANK": "1", "GROUP_RANK": "0"}
@@ -233,7 +244,6 @@ def on_fit_start(self, trainer, pl_module):
         trainer.fit(model)
 
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -245,9 +255,10 @@ def on_fit_start(self, trainer, pl_module):
         "RANK": "1",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_accelerator_choice_ddp_kubeflow(device_count_mock, setup_distributed_mock):
+def test_accelerator_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -258,11 +269,14 @@ def on_fit_start(self, trainer, pl_module):
             raise SystemExit()
 
     model = BoringModel()
-    trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1, callbacks=[CB()])
+    with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
+        trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1, callbacks=[CB()])
 
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
 @mock.patch.dict(
     os.environ,
@@ -323,29 +337,28 @@ def on_fit_start(self, trainer, pl_module):
         trainer.fit(model)
 
 
-@RunIf(special=True)
-def test_accelerator_choice_ddp_cpu_and_plugin(tmpdir):
+@RunIf(skip_windows=True, special=True)
+def test_accelerator_choice_ddp_cpu_and_strategy(tmpdir):
     """Test that accelerator="ddp_cpu" can work together with an instance of DDPPlugin."""
-    _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class=DDPPlugin)
+    _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPPlugin)
 
 
-@RunIf(special=True)
-def test_accelerator_choice_ddp_cpu_and_plugin_spawn(tmpdir):
+@RunIf(skip_windows=True)
+def test_accelerator_choice_ddp_cpu_and_strategy_spawn(tmpdir):
     """Test that accelerator="ddp_cpu" can work together with an instance of DDPPSpawnPlugin."""
-    _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class=DDPSpawnPlugin)
-
+    _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPSpawnPlugin)
 
-def _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class):
 
+def _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class):
     model = BoringModel()
     trainer = Trainer(
         default_root_dir=tmpdir,
-        plugins=[ddp_plugin_class(find_unused_parameters=True)],
+        strategy=ddp_strategy_class(find_unused_parameters=True),
         fast_dev_run=True,
         accelerator="ddp_cpu",
         num_processes=2,
     )
-    assert isinstance(trainer.training_type_plugin, ddp_plugin_class)
+    assert isinstance(trainer.training_type_plugin, ddp_strategy_class)
     assert isinstance(trainer.accelerator, CPUAccelerator)
     assert trainer.training_type_plugin.num_processes == 2
     assert trainer.training_type_plugin.parallel_devices == [torch.device("cpu")] * 2
@@ -793,7 +806,6 @@ def on_fit_start(self, trainer, pl_module):
         trainer.fit(model)
 
 
-@RunIf(min_gpus=2)
 @mock.patch.dict(
     os.environ,
     {
@@ -805,10 +817,11 @@ def on_fit_start(self, trainer, pl_module):
         "SLURM_LOCALID": "1",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
 @pytest.mark.parametrize("strategy", ["ddp2", DDP2Plugin()])
-def test_strategy_choice_ddp2_slurm(device_count_mock, setup_distributed_mock, strategy):
+def test_strategy_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock, strategy):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert trainer._accelerator_connector._is_slurm_managing_tasks
@@ -825,8 +838,9 @@ def on_fit_start(self, trainer, pl_module):
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -838,9 +852,10 @@ def on_fit_start(self, trainer, pl_module):
         "GROUP_RANK": "0",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_strategy_choice_ddp_te(device_count_mock, setup_distributed_mock):
+def test_strategy_choice_ddp_te(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -856,8 +871,9 @@ def on_fit_start(self, trainer, pl_module):
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -869,9 +885,10 @@ def on_fit_start(self, trainer, pl_module):
         "GROUP_RANK": "0",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_strategy_choice_ddp2_te(device_count_mock, setup_distributed_mock):
+def test_strategy_choice_ddp2_te(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -887,6 +904,8 @@ def on_fit_start(self, trainer, pl_module):
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
 @mock.patch.dict(
     os.environ, {"WORLD_SIZE": "2", "LOCAL_WORLD_SIZE": "2", "RANK": "1", "LOCAL_RANK": "1", "GROUP_RANK": "0"}
@@ -910,7 +929,6 @@ def on_fit_start(self, trainer, pl_module):
         trainer.fit(model)
 
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -922,9 +940,10 @@ def on_fit_start(self, trainer, pl_module):
         "RANK": "1",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_strategy_choice_ddp_kubeflow(device_count_mock, setup_distributed_mock):
+def test_strategy_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -940,6 +959,8 @@ def on_fit_start(self, trainer, pl_module):
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
 @mock.patch.dict(
     os.environ,
diff --git a/tests/callbacks/test_early_stopping.py b/tests/callbacks/test_early_stopping.py
index da200cc336504d..ffc8ee2ae0846f 100644
--- a/tests/callbacks/test_early_stopping.py
+++ b/tests/callbacks/test_early_stopping.py
@@ -381,7 +381,7 @@ def on_train_end(self) -> None:
 
 _ES_CHECK = dict(check_on_train_epoch_end=True)
 _ES_CHECK_P3 = dict(patience=3, check_on_train_epoch_end=True)
-_NO_WIN = dict(marks=RunIf(skip_windows=True))
+_SPAWN_MARK = dict(marks=RunIf(skip_windows=True, skip_49370=True))
 
 
 @pytest.mark.parametrize(
@@ -389,8 +389,8 @@ def on_train_end(self) -> None:
     [
         ([EarlyStopping("abc"), EarlyStopping("cba", patience=3)], 3, False, None, 1),
         ([EarlyStopping("cba", patience=3), EarlyStopping("abc")], 3, False, None, 1),
-        pytest.param([EarlyStopping("abc"), EarlyStopping("cba", patience=3)], 3, False, "ddp_spawn", 2, **_NO_WIN),
-        pytest.param([EarlyStopping("cba", patience=3), EarlyStopping("abc")], 3, False, "ddp_spawn", 2, **_NO_WIN),
+        pytest.param([EarlyStopping("abc"), EarlyStopping("cba", patience=3)], 3, False, "ddp_spawn", 2, **_SPAWN_MARK),
+        pytest.param([EarlyStopping("cba", patience=3), EarlyStopping("abc")], 3, False, "ddp_spawn", 2, **_SPAWN_MARK),
         ([EarlyStopping("abc", **_ES_CHECK), EarlyStopping("cba", **_ES_CHECK_P3)], 3, True, None, 1),
         ([EarlyStopping("cba", **_ES_CHECK_P3), EarlyStopping("abc", **_ES_CHECK)], 3, True, None, 1),
         pytest.param(
@@ -399,7 +399,7 @@ def on_train_end(self) -> None:
             True,
             "ddp_spawn",
             2,
-            **_NO_WIN,
+            **_SPAWN_MARK,
         ),
         pytest.param(
             [EarlyStopping("cba", **_ES_CHECK_P3), EarlyStopping("abc", **_ES_CHECK)],
@@ -407,7 +407,7 @@ def on_train_end(self) -> None:
             True,
             "ddp_spawn",
             2,
-            **_NO_WIN,
+            **_SPAWN_MARK,
         ),
     ],
 )
diff --git a/tests/callbacks/test_pruning.py b/tests/callbacks/test_pruning.py
index 1c1f84b5b95a0e..c813ed2b02e28b 100644
--- a/tests/callbacks/test_pruning.py
+++ b/tests/callbacks/test_pruning.py
@@ -187,7 +187,7 @@ def test_pruning_callback_ddp_spawn(tmpdir):
     train_with_pruning_callback(tmpdir, use_global_unstructured=True, strategy="ddp_spawn", gpus=2)
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_pruning_callback_ddp_cpu(tmpdir):
     train_with_pruning_callback(tmpdir, parameters_to_prune=True, strategy="ddp_spawn", num_processes=2)
 
diff --git a/tests/callbacks/test_stochastic_weight_avg.py b/tests/callbacks/test_stochastic_weight_avg.py
index 8bed31bc48ac80..c7186e819ea944 100644
--- a/tests/callbacks/test_stochastic_weight_avg.py
+++ b/tests/callbacks/test_stochastic_weight_avg.py
@@ -148,7 +148,7 @@ def test_swa_callback_ddp_spawn(tmpdir):
     train_with_swa(tmpdir, strategy="ddp_spawn", gpus=2)
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_swa_callback_ddp_cpu(tmpdir):
     train_with_swa(tmpdir, strategy="ddp_spawn", num_processes=2)
 
diff --git a/tests/callbacks/test_tqdm_progress_bar.py b/tests/callbacks/test_tqdm_progress_bar.py
index 99fe02ce21a112..a8371591759d7b 100644
--- a/tests/callbacks/test_tqdm_progress_bar.py
+++ b/tests/callbacks/test_tqdm_progress_bar.py
@@ -14,7 +14,7 @@
 import os
 import pickle
 import sys
-from typing import Optional, Union
+from typing import Union
 from unittest import mock
 from unittest.mock import ANY, call, Mock
 
@@ -32,65 +32,54 @@
 
 
 @pytest.mark.parametrize(
-    "callbacks,refresh_rate",
+    "kwargs",
     [
-        ([], None),
-        ([], 1),
-        ([], 2),
-        ([TQDMProgressBar(refresh_rate=1)], 0),
-        ([TQDMProgressBar(refresh_rate=2)], 0),
-        ([TQDMProgressBar(refresh_rate=2)], 1),
+        # won't print but is still set
+        {"callbacks": TQDMProgressBar(refresh_rate=0)},
+        {"callbacks": TQDMProgressBar()},
+        {"progress_bar_refresh_rate": 1},
     ],
 )
-def test_tqdm_progress_bar_on(tmpdir, callbacks: list, refresh_rate: Optional[int]):
+def test_tqdm_progress_bar_on(tmpdir, kwargs):
     """Test different ways the progress bar can be turned on."""
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        callbacks=callbacks,
-        progress_bar_refresh_rate=refresh_rate,
-        max_epochs=1,
-        overfit_batches=5,
-    )
+    if "progress_bar_refresh_rate" in kwargs:
+        with pytest.deprecated_call(match=r"progress_bar_refresh_rate=.*` is deprecated"):
+            trainer = Trainer(default_root_dir=tmpdir, **kwargs)
+    else:
+        trainer = Trainer(default_root_dir=tmpdir, **kwargs)
 
     progress_bars = [c for c in trainer.callbacks if isinstance(c, ProgressBarBase)]
-    # Trainer supports only a single progress bar callback at the moment
     assert len(progress_bars) == 1
     assert progress_bars[0] is trainer.progress_bar_callback
 
 
-@pytest.mark.parametrize(
-    "callbacks,refresh_rate,enable_progress_bar",
-    [([], 0, True), ([], False, True), ([ModelCheckpoint(dirpath="../trainer")], 0, True), ([], 1, False)],
-)
-def test_tqdm_progress_bar_off(tmpdir, callbacks: list, refresh_rate: Union[bool, int], enable_progress_bar: bool):
+@pytest.mark.parametrize("kwargs", [{"enable_progress_bar": False}, {"progress_bar_refresh_rate": 0}])
+def test_tqdm_progress_bar_off(tmpdir, kwargs):
     """Test different ways the progress bar can be turned off."""
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        callbacks=callbacks,
-        progress_bar_refresh_rate=refresh_rate,
-        enable_progress_bar=enable_progress_bar,
-    )
-
-    progress_bars = [c for c in trainer.callbacks if isinstance(c, TQDMProgressBar)]
-    assert 0 == len(progress_bars)
-    assert not trainer.progress_bar_callback
+    if "progress_bar_refresh_rate" in kwargs:
+        pytest.deprecated_call(match=r"progress_bar_refresh_rate=.*` is deprecated").__enter__()
+    trainer = Trainer(default_root_dir=tmpdir, **kwargs)
+    progress_bars = [c for c in trainer.callbacks if isinstance(c, ProgressBarBase)]
+    assert not len(progress_bars)
 
 
 def test_tqdm_progress_bar_misconfiguration():
     """Test that Trainer doesn't accept multiple progress bars."""
+    # Trainer supports only a single progress bar callback at the moment
     callbacks = [TQDMProgressBar(), TQDMProgressBar(), ModelCheckpoint(dirpath="../trainer")]
     with pytest.raises(MisconfigurationException, match=r"^You added multiple progress bar callbacks"):
         Trainer(callbacks=callbacks)
 
+    with pytest.raises(MisconfigurationException, match=r"enable_progress_bar=False` but found `TQDMProgressBar"):
+        Trainer(callbacks=TQDMProgressBar(), enable_progress_bar=False)
+
 
 def test_tqdm_progress_bar_totals(tmpdir):
     """Test that the progress finishes with the correct total steps processed."""
 
     model = BoringModel()
 
-    trainer = Trainer(default_root_dir=tmpdir, progress_bar_refresh_rate=1, max_epochs=1)
+    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1)
     bar = trainer.progress_bar_callback
     assert float("inf") == bar.total_train_batches
     assert 0 == bar.total_val_batches
@@ -209,14 +198,15 @@ def on_test_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, datal
             self.test_batches_seen += 1
 
     progress_bar = CurrentProgressBar(refresh_rate=refresh_rate)
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        callbacks=[progress_bar],
-        progress_bar_refresh_rate=101,  # should not matter if custom callback provided
-        limit_train_batches=1.0,
-        num_sanity_val_steps=2,
-        max_epochs=3,
-    )
+    with pytest.deprecated_call(match=r"progress_bar_refresh_rate=101\)` is deprecated"):
+        trainer = Trainer(
+            default_root_dir=tmpdir,
+            callbacks=[progress_bar],
+            progress_bar_refresh_rate=101,  # should not matter if custom callback provided
+            limit_train_batches=1.0,
+            num_sanity_val_steps=2,
+            max_epochs=3,
+        )
     assert trainer.progress_bar_callback.refresh_rate == refresh_rate
 
     trainer.fit(model)
@@ -276,9 +266,6 @@ def test_tqdm_progress_bar_default_value(tmpdir):
     trainer = Trainer(default_root_dir=tmpdir)
     assert trainer.progress_bar_callback.refresh_rate == 1
 
-    trainer = Trainer(default_root_dir=tmpdir, progress_bar_refresh_rate=None)
-    assert trainer.progress_bar_callback.refresh_rate == 1
-
 
 @mock.patch.dict(os.environ, {"COLAB_GPU": "1"})
 def test_tqdm_progress_bar_value_on_colab(tmpdir):
@@ -286,10 +273,14 @@ def test_tqdm_progress_bar_value_on_colab(tmpdir):
     trainer = Trainer(default_root_dir=tmpdir)
     assert trainer.progress_bar_callback.refresh_rate == 20
 
-    trainer = Trainer(default_root_dir=tmpdir, progress_bar_refresh_rate=None)
-    assert trainer.progress_bar_callback.refresh_rate == 20
+    trainer = Trainer(default_root_dir=tmpdir, callbacks=TQDMProgressBar())
+    assert trainer.progress_bar_callback.refresh_rate == 1  # FIXME: should be 20
+
+    trainer = Trainer(default_root_dir=tmpdir, callbacks=TQDMProgressBar(refresh_rate=19))
+    assert trainer.progress_bar_callback.refresh_rate == 19
 
-    trainer = Trainer(default_root_dir=tmpdir, progress_bar_refresh_rate=19)
+    with pytest.deprecated_call(match=r"progress_bar_refresh_rate=19\)` is deprecated"):
+        trainer = Trainer(default_root_dir=tmpdir, progress_bar_refresh_rate=19)
     assert trainer.progress_bar_callback.refresh_rate == 19
 
 
diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py
index 518d67cf251f5a..04255d51ad0699 100644
--- a/tests/checkpointing/test_model_checkpoint.py
+++ b/tests/checkpointing/test_model_checkpoint.py
@@ -385,7 +385,7 @@ def on_train_end(self, trainer, pl_module):
             assert torch.save.call_count == 0
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_model_checkpoint_no_extraneous_invocations(tmpdir):
     """Test to ensure that the model callback saves the checkpoints only once in distributed mode."""
     model = LogInTwoMethods()
diff --git a/tests/checkpointing/test_torch_saving.py b/tests/checkpointing/test_torch_saving.py
index 8b0f0e457bff97..f9634a9dadb2ab 100644
--- a/tests/checkpointing/test_torch_saving.py
+++ b/tests/checkpointing/test_torch_saving.py
@@ -34,7 +34,7 @@ def test_model_torch_save(tmpdir):
     trainer = torch.load(temp_path)
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_model_torch_save_ddp_cpu(tmpdir):
     """Test to ensure torch save does not fail for model and trainer using cpu ddp."""
     model = BoringModel()
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
deleted file mode 100644
index efb288a623d6af..00000000000000
--- a/tests/deprecated_api/test_remove_1-6.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Test deprecated functionality which will be removed in v1.6.0."""
-from unittest.mock import call, Mock
-
-import pytest
-
-from pytorch_lightning import Trainer
-from tests.helpers import BoringModel
-
-
-def test_v1_6_0_reload_dataloaders_every_epoch(tmpdir):
-    model = BoringModel()
-
-    tracker = Mock()
-    model.train_dataloader = Mock(wraps=model.train_dataloader)
-    model.val_dataloader = Mock(wraps=model.val_dataloader)
-    model.test_dataloader = Mock(wraps=model.test_dataloader)
-
-    tracker.attach_mock(model.train_dataloader, "train_dataloader")
-    tracker.attach_mock(model.val_dataloader, "val_dataloader")
-    tracker.attach_mock(model.test_dataloader, "test_dataloader")
-
-    with pytest.deprecated_call(match="`reload_dataloaders_every_epoch` is deprecated in v1.4 and will be removed"):
-        trainer = Trainer(
-            default_root_dir=tmpdir,
-            limit_train_batches=0.3,
-            limit_val_batches=0.3,
-            reload_dataloaders_every_epoch=True,
-            max_epochs=3,
-        )
-    trainer.fit(model)
-    trainer.test()
-
-    expected_sequence = (
-        [call.val_dataloader()] + [call.train_dataloader(), call.val_dataloader()] * 3 + [call.test_dataloader()]
-    )
-    assert tracker.mock_calls == expected_sequence
-
-
-def test_v1_6_0_deprecated_disable_validation():
-    trainer = Trainer()
-    with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"):
-        _ = trainer.disable_validation
diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py
index 4da10fb0b666a0..09a8df66a02ccc 100644
--- a/tests/deprecated_api/test_remove_1-7.py
+++ b/tests/deprecated_api/test_remove_1-7.py
@@ -245,7 +245,7 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
         return super().get_from_queue(queue)
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_v1_7_0_deprecate_add_get_queue(tmpdir):
     model = BoringCallbackDDPSpawnModel()
     trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, num_processes=2, strategy="ddp_spawn")
diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py
index 490e023662f79c..e53d3811f6b345 100644
--- a/tests/helpers/runif.py
+++ b/tests/helpers/runif.py
@@ -70,6 +70,7 @@ def __new__(
         fairscale_fully_sharded: bool = False,
         deepspeed: bool = False,
         rich: bool = False,
+        skip_49370: bool = False,
         **kwargs,
     ):
         """
@@ -91,6 +92,7 @@ def __new__(
             fairscale_fully_sharded: if `fairscale` fully sharded module is required to run the test
             deepspeed: if `deepspeed` module is required to run the test
             rich: if `rich` module is required to run the test
+            skip_49370: Skip the test as it's impacted by https://github.com/pytorch/pytorch/issues/49370.
             kwargs: native pytest.mark.skipif keyword arguments
         """
         conditions = []
@@ -165,6 +167,15 @@ def __new__(
             conditions.append(not _RICH_AVAILABLE)
             reasons.append("Rich")
 
+        if skip_49370:
+            # strategy=ddp_spawn, accelerator=cpu, python>=3.9, torch<1.8 does not work
+            py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
+            ge_3_9 = Version(py_version) >= Version("3.9")
+            torch_version = get_distribution("torch").version
+            old_torch = Version(torch_version) < Version("1.8")
+            conditions.append(ge_3_9 and old_torch)
+            reasons.append("Impacted by https://github.com/pytorch/pytorch/issues/49370")
+
         reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
         return pytest.mark.skipif(
             *args, condition=any(conditions), reason=f"Requires: [{' + '.join(reasons)}]", **kwargs
diff --git a/tests/lite/test_wrappers.py b/tests/lite/test_wrappers.py
index 4993a10c8dbc23..c271d3b3163edb 100644
--- a/tests/lite/test_wrappers.py
+++ b/tests/lite/test_wrappers.py
@@ -17,6 +17,7 @@
 import torch
 from torch.utils.data.dataloader import DataLoader
 
+from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin
 from pytorch_lightning.lite import LightningLite
 from pytorch_lightning.lite.wrappers import _LiteDataLoader, _LiteModule, _LiteOptimizer
 from tests.helpers.runif import RunIf
@@ -65,6 +66,27 @@ def check_autocast(forward_input):
     assert out.dtype == input_type or out.dtype == torch.get_default_dtype()
 
 
+@pytest.mark.parametrize(
+    "device", [torch.device("cpu"), pytest.param(torch.device("cuda", 0), marks=RunIf(min_gpus=1))]
+)
+@pytest.mark.parametrize("dtype", [torch.float32, torch.float16])
+def test_lite_module_device_dtype_propagation(device, dtype):
+    """Test that the LiteModule propagates device and dtype properties to its submodules (e.g. torchmetrics)."""
+
+    class DeviceModule(DeviceDtypeModuleMixin):
+        pass
+
+    device_module = DeviceModule()
+    lite_module = _LiteModule(device_module, Mock())
+    lite_module.to(device)
+    assert device_module.device == device
+    assert lite_module.device == device
+
+    lite_module.to(dtype)
+    assert device_module.dtype == dtype
+    assert lite_module.dtype == dtype
+
+
 def test_lite_dataloader_iterator():
     """Test that the iteration over a LiteDataLoader wraps the iterator of the underlying dataloader (no automatic
     device placement)."""
diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py
index 271ffce811fe54..370b24431b088b 100644
--- a/tests/loggers/test_all.py
+++ b/tests/loggers/test_all.py
@@ -321,8 +321,8 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx):
             assert pl_module.logger.experiment.something(foo="bar") is None
 
 
+@RunIf(skip_windows=True, skip_49370=True)
 @pytest.mark.parametrize("logger_class", [CometLogger, CSVLogger, MLFlowLogger, TensorBoardLogger, TestTubeLogger])
-@RunIf(skip_windows=True)
 def test_logger_created_on_rank_zero_only(tmpdir, monkeypatch, logger_class):
     """Test that loggers get replaced by dummy loggers on global rank > 0."""
     _patch_comet_atexit(monkeypatch)
diff --git a/tests/loops/test_loops.py b/tests/loops/test_loops.py
index 63a2211934ece8..6bd7db1aeff8d8 100644
--- a/tests/loops/test_loops.py
+++ b/tests/loops/test_loops.py
@@ -791,7 +791,7 @@ def val_dataloader(self):
         max_epochs=1,
         val_check_interval=val_check_interval,
         num_sanity_val_steps=0,
-        progress_bar_refresh_rate=0,
+        enable_progress_bar=False,
     )
     trainer.fit(model)
 
@@ -829,7 +829,7 @@ def val_dataloader(self):
         max_epochs=1,
         val_check_interval=val_check_interval,
         num_sanity_val_steps=0,
-        progress_bar_refresh_rate=0,
+        enable_progress_bar=False,
     )
     with pytest.raises(CustomException):
         # will stop during validation
@@ -880,7 +880,7 @@ def val_dataloader(self):
         max_epochs=1,
         val_check_interval=val_check_interval,
         num_sanity_val_steps=0,
-        progress_bar_refresh_rate=0,
+        enable_progress_bar=False,
     )
     trainer.fit(model, ckpt_path=ckpt_path)
 
diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py
index 2fb537b1d2861d..c110f3a83d815d 100644
--- a/tests/models/test_cpu.py
+++ b/tests/models/test_cpu.py
@@ -122,7 +122,7 @@ def validation_step(self, *args, **kwargs):
     model.unfreeze()
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_multi_cpu_model_ddp(tmpdir):
     """Make sure DDP works."""
     tutils.set_random_main_port()
diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 6b34553ff313be..b55e8344ef146f 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -866,7 +866,7 @@ def call(hook, fn, *args, **kwargs):
         limit_predict_batches=batches,
         enable_progress_bar=False,
         enable_model_summary=False,
-        reload_dataloaders_every_epoch=True,
+        reload_dataloaders_every_n_epochs=True,
     )
 
     called = []
diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py
index abf5a347574245..59a22cf1656d18 100644
--- a/tests/models/test_horovod.py
+++ b/tests/models/test_horovod.py
@@ -66,7 +66,7 @@ def _run_horovod(trainer_options, on_gpu=False):
     assert exit_code == 0
 
 
-@RunIf(skip_windows=True, horovod=True)
+@RunIf(skip_windows=True, horovod=True, skip_49370=True)
 def test_horovod_cpu(tmpdir):
     """Test Horovod running multi-process on CPU."""
     trainer_options = dict(
@@ -82,7 +82,7 @@ def test_horovod_cpu(tmpdir):
     _run_horovod(trainer_options)
 
 
-@RunIf(skip_windows=True, horovod=True)
+@RunIf(skip_windows=True, horovod=True, skip_49370=True)
 def test_horovod_cpu_clip_grad_by_value(tmpdir):
     """Test Horovod running multi-process on CPU."""
     trainer_options = dict(
@@ -99,7 +99,7 @@ def test_horovod_cpu_clip_grad_by_value(tmpdir):
     _run_horovod(trainer_options)
 
 
-@RunIf(skip_windows=True, horovod=True)
+@RunIf(skip_windows=True, horovod=True, skip_49370=True)
 def test_horovod_cpu_implicit(tmpdir):
     """Test Horovod without specifying a backend, inferring from env set by `horovodrun`."""
     trainer_options = dict(
diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py
index c389cf9290c783..c5e5f7ccda7485 100644
--- a/tests/plugins/test_ddp_spawn_plugin.py
+++ b/tests/plugins/test_ddp_spawn_plugin.py
@@ -46,7 +46,7 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
         return super().get_from_queue(queue)
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_ddp_cpu():
     """Tests if device is set correctly when training for DDPSpawnPlugin."""
     trainer = Trainer(num_processes=2, fast_dev_run=True)
@@ -91,7 +91,7 @@ def get_from_queue(self, trainer: Trainer, queue: torch.multiprocessing.SimpleQu
         return super().get_from_queue(trainer, queue)
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_ddp_spawn_add_get_queue(tmpdir):
     """Tests add_to_queue/get_from_queue with DDPSpawnPlugin."""
 
@@ -128,7 +128,7 @@ def on_predict_start(self) -> None:
         assert isinstance(self.trainer.model, LightningModule)
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_ddp_spawn_configure_ddp(tmpdir):
     """Tests with ddp spawn plugin."""
     trainer = Trainer(default_root_dir=tmpdir, num_processes=2, strategy="ddp_spawn", fast_dev_run=True)
diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py
index 8804348c8f9155..836c0f685195fe 100644
--- a/tests/plugins/test_deepspeed_plugin.py
+++ b/tests/plugins/test_deepspeed_plugin.py
@@ -1,5 +1,6 @@
 import contextlib
 import json
+import logging
 import os
 from typing import Any, Dict, Optional
 from unittest import mock
@@ -872,24 +873,9 @@ def training_step(self, batch, batch_idx):
         trainer.fit(model)
 
 
-@RunIf(min_gpus=1, deepspeed=True, special=True)
-def test_deepspeed_warn_train_dataloader_called(tmpdir):
-    """Test DeepSpeed warns when it calls ``lightning_module.train_dataloader`` internally for logging batch
-    size."""
-    model = BoringModel()
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        strategy=DeepSpeedPlugin(),
-        gpus=1,
-        fast_dev_run=True,
-    )
-    with pytest.warns(UserWarning, match="Inferring the batch size for internal deepspeed logging"):
-        trainer.fit(model)
-
-
 @RunIf(min_gpus=1, deepspeed=True, special=True)
 def test_deepspeed_setup_train_dataloader(tmpdir):
-    """Test DeepSpeed works when setup is required to call, and the user passes the batch size manually."""
+    """Test DeepSpeed works when setup is required to call in the DataModule."""
 
     class TestSetupIsCalledDataModule(LightningDataModule):
         def __init__(self):
@@ -914,13 +900,14 @@ def test_dataloader(self):
     model = BoringModel()
     trainer = Trainer(
         default_root_dir=tmpdir,
-        strategy=DeepSpeedPlugin(logging_batch_size_per_gpu=32),
+        strategy=DeepSpeedPlugin(logging_level=logging.INFO),
         gpus=1,
         fast_dev_run=True,
     )
     dm = TestSetupIsCalledDataModule()
-    trainer.fit(model, datamodule=dm)
-    trainer.test(model, datamodule=dm)
+    with mock.patch("deepspeed.utils.logging.logger.warning", autospec=True) as mock_object:
+        trainer.fit(model, datamodule=dm)
+    assert any("Tried to infer the batch size" in str(arg) for arg in mock_object.call_args_list)
 
 
 @mock.patch("torch.optim.lr_scheduler.StepLR.step", autospec=True)
diff --git a/tests/profiler/test_profiler.py b/tests/profiler/test_profiler.py
index 37756fcc623517..5b8c3939c7b485 100644
--- a/tests/profiler/test_profiler.py
+++ b/tests/profiler/test_profiler.py
@@ -161,7 +161,7 @@ def test_simple_profiler_with_nonexisting_dirpath(tmpdir):
     assert nonexisting_tmpdir.join("fit-profiler.txt").exists()
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_simple_profiler_distributed_files(tmpdir):
     """Ensure the proper files are saved in distributed."""
     profiler = SimpleProfiler(dirpath=tmpdir, filename="profiler")
@@ -226,6 +226,7 @@ def test_advanced_profiler_iterable_durations(advanced_profiler, action: str, ex
     np.testing.assert_allclose(recored_total_duration, expected_total_duration, rtol=0.2)
 
 
+@pytest.mark.flaky(reruns=3)
 def test_advanced_profiler_overhead(advanced_profiler, n_iter=5):
     """ensure that the profiler doesn't introduce too much overhead during training."""
     for _ in range(n_iter):
diff --git a/tests/trainer/connectors/test_callback_connector.py b/tests/trainer/connectors/test_callback_connector.py
index 2cb68aa2e95bdc..e3c353c3eb063a 100644
--- a/tests/trainer/connectors/test_callback_connector.py
+++ b/tests/trainer/connectors/test_callback_connector.py
@@ -22,6 +22,7 @@
     LearningRateMonitor,
     ModelCheckpoint,
     ModelSummary,
+    ProgressBarBase,
     TQDMProgressBar,
 )
 from pytorch_lightning.trainer.connectors.callback_connector import CallbackConnector
@@ -143,10 +144,11 @@ def test_attach_model_callbacks():
     def _attach_callbacks(trainer_callbacks, model_callbacks):
         model = LightningModule()
         model.configure_callbacks = lambda: model_callbacks
+        has_progress_bar = any(isinstance(cb, ProgressBarBase) for cb in trainer_callbacks + model_callbacks)
         trainer = Trainer(
             enable_checkpointing=False,
-            enable_progress_bar=False,
-            enable_model_summary=None,
+            enable_progress_bar=has_progress_bar,
+            enable_model_summary=False,
             callbacks=trainer_callbacks,
         )
         trainer.model = model
diff --git a/tests/trainer/flags/test_overfit_batches.py b/tests/trainer/flags/test_overfit_batches.py
index 76c8b37405b47e..3860d85ec9836d 100644
--- a/tests/trainer/flags/test_overfit_batches.py
+++ b/tests/trainer/flags/test_overfit_batches.py
@@ -13,13 +13,16 @@
 # limitations under the License.
 import pytest
 import torch
+from torch.utils.data.sampler import Sampler, SequentialSampler
 
 from pytorch_lightning import Trainer
 from tests.helpers.boring_model import BoringModel, RandomDataset
 
 
 def test_overfit_multiple_val_loaders(tmpdir):
-    """Tests that only training_step can be used."""
+    """Tests that overfit batches works with multiple val dataloaders."""
+    val_dl_count = 2
+    overfit_batches = 3
 
     class TestModel(BoringModel):
         def validation_step(self, batch, batch_idx, dataloader_idx):
@@ -31,25 +34,65 @@ def validation_epoch_end(self, outputs) -> None:
             pass
 
         def val_dataloader(self):
-            dl1 = torch.utils.data.DataLoader(RandomDataset(32, 64))
-            dl2 = torch.utils.data.DataLoader(RandomDataset(32, 64))
-            return [dl1, dl2]
+            dls = [torch.utils.data.DataLoader(RandomDataset(32, 64)) for _ in range(val_dl_count)]
+            return dls
 
     model = TestModel()
 
     trainer = Trainer(
-        default_root_dir=tmpdir, max_epochs=2, overfit_batches=1, log_every_n_steps=1, enable_model_summary=False
+        default_root_dir=tmpdir,
+        max_epochs=2,
+        overfit_batches=overfit_batches,
+        log_every_n_steps=1,
+        enable_model_summary=False,
     )
 
     trainer.fit(model)
+    assert trainer.num_training_batches == overfit_batches
+    assert len(trainer.num_val_batches) == val_dl_count
+    assert all(nbatches == overfit_batches for nbatches in trainer.num_val_batches)
 
 
-@pytest.mark.parametrize("overfit", [1, 2, 0.1, 0.25, 1.0])
-def test_overfit_basic(tmpdir, overfit):
-    """Tests that only training_step can be used."""
+@pytest.mark.parametrize("overfit_batches", [1, 2, 0.1, 0.25, 1.0])
+def test_overfit_basic(tmpdir, overfit_batches):
+    """Tests that only training_step can be used when overfitting."""
 
     model = BoringModel()
+    model.validation_step = None
+    total_train_samples = len(BoringModel().train_dataloader())
 
-    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, overfit_batches=overfit, enable_model_summary=False)
-
+    trainer = Trainer(
+        default_root_dir=tmpdir, max_epochs=1, overfit_batches=overfit_batches, enable_model_summary=False
+    )
     trainer.fit(model)
+
+    assert trainer.num_val_batches == []
+    assert trainer.num_training_batches == int(
+        overfit_batches * (1 if isinstance(overfit_batches, int) else total_train_samples)
+    )
+
+
+def test_overfit_batches_raises_warning_in_case_of_sequential_sampler(tmpdir):
+    class NonSequentialSampler(Sampler):
+        def __init__(self, data_source):
+            self.data_source = data_source
+
+        def __iter__(self):
+            return iter(range(len(self.data_source)))
+
+        def __len__(self):
+            return len(self.data_source)
+
+    class TestModel(BoringModel):
+        def train_dataloader(self):
+            dataset = RandomDataset(32, 64)
+            sampler = NonSequentialSampler(dataset)
+            return torch.utils.data.DataLoader(dataset, sampler=sampler)
+
+    model = TestModel()
+    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, overfit_batches=2)
+
+    with pytest.warns(UserWarning, match="requested to overfit but enabled training dataloader shuffling"):
+        trainer.fit(model)
+
+    assert isinstance(trainer.train_dataloader.loaders.sampler, SequentialSampler)
diff --git a/tests/trainer/logging_/test_distributed_logging.py b/tests/trainer/logging_/test_distributed_logging.py
index 487b7f38e4e197..d4ba4f242294ac 100644
--- a/tests/trainer/logging_/test_distributed_logging.py
+++ b/tests/trainer/logging_/test_distributed_logging.py
@@ -59,7 +59,7 @@ def on_train_end(self):
         assert self.log_name.format(rank=self.local_rank) in self.logger.logs, "Expected rank to be logged"
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_all_rank_logging_ddp_cpu(tmpdir):
     """Check that all ranks can be logged from."""
     model = TestModel()
diff --git a/tests/trainer/logging_/test_train_loop_logging.py b/tests/trainer/logging_/test_train_loop_logging.py
index 5b775b9968d993..22a1a2c90d7562 100644
--- a/tests/trainer/logging_/test_train_loop_logging.py
+++ b/tests/trainer/logging_/test_train_loop_logging.py
@@ -395,7 +395,7 @@ def validation_step(self, batch, batch_idx):
         return super().validation_step(batch, batch_idx)
 
 
-@pytest.mark.parametrize("devices", [1, pytest.param(2, marks=RunIf(skip_windows=True))])
+@pytest.mark.parametrize("devices", [1, pytest.param(2, marks=RunIf(skip_windows=True, skip_49370=True))])
 def test_logging_sync_dist_true(tmpdir, devices):
     """Tests to ensure that the sync_dist flag works (should just return the original value)"""
     fake_result = 1
diff --git a/tests/trainer/properties/test_get_model.py b/tests/trainer/properties/test_get_model.py
index 6e405739e83fe9..ed81b90a2d1423 100644
--- a/tests/trainer/properties/test_get_model.py
+++ b/tests/trainer/properties/test_get_model.py
@@ -37,7 +37,7 @@ def test_get_model(tmpdir):
     trainer.fit(model)
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_get_model_ddp_cpu(tmpdir):
     """Tests that `trainer.lightning_module` extracts the model correctly when using ddp on cpu."""
 
diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index 4f3a482e37ac47..edf40ac61d5ddb 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -133,7 +133,7 @@ def _get_warning_msg():
             assert warn_str in msg
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 @pytest.mark.parametrize("num_workers", [0, 1])
 def test_dataloader_warnings(tmpdir, num_workers):
     trainer = Trainer(default_root_dir=tmpdir, strategy="ddp_spawn", num_processes=2, fast_dev_run=4)
diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py
index 1ffc957659ef01..272078b1d42064 100644
--- a/tests/trainer/test_dataloaders.py
+++ b/tests/trainer/test_dataloaders.py
@@ -1276,7 +1276,7 @@ def validation_step(self, batch, batch_idx):
         # the val dataloader on the first epoch because this only tracks the training epoch
         # meaning multiple passes through the validation data within a single training epoch
         # would not have the dataloader reloaded.
-        # This breaks the assumption behind reload_dataloaders_every_epoch=True
+        # This breaks the assumption behind reload_dataloaders_every_n_epochs=True
         call.val_dataloader(),
         call.train_dataloader(),
         call.val_dataloader(),
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index dc0ce2b68452c8..b9fb758321a0b3 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -1809,7 +1809,7 @@ def on_predict_start(self) -> None:
 
 
 @pytest.mark.parametrize(
-    "strategy,num_processes", [(None, 1), pytest.param("ddp_spawn", 2, marks=RunIf(skip_windows=True))]
+    "strategy,num_processes", [(None, 1), pytest.param("ddp_spawn", 2, marks=RunIf(skip_windows=True, skip_49370=True))]
 )
 def test_model_in_correct_mode_during_stages(tmpdir, strategy, num_processes):
     model = TrainerStagesModel()
@@ -1830,7 +1830,7 @@ def validation_epoch_end(self, outputs) -> None:
         pass
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, skip_49370=True)
 def test_fit_test_synchronization(tmpdir):
     """Test that the trainer synchronizes processes before returning control back to the caller."""
     tutils.set_random_main_port()
diff --git a/tests/utilities/test_all_gather_grad.py b/tests/utilities/test_all_gather_grad.py
index 073468fc4cb289..2ed42b0b0f21a6 100644
--- a/tests/utilities/test_all_gather_grad.py
+++ b/tests/utilities/test_all_gather_grad.py
@@ -41,8 +41,8 @@ def _test_all_gather_ddp(rank, world_size):
     assert torch.allclose(grad2, tensor2.grad)
 
 
-@RunIf(skip_windows=True)
-def test_all_gather_ddp():
+@RunIf(skip_windows=True, skip_49370=True)
+def test_all_gather_ddp_spawn():
     world_size = 3
     torch.multiprocessing.spawn(_test_all_gather_ddp, args=(world_size,), nprocs=world_size)
 
diff --git a/tests/utilities/test_meta.py b/tests/utilities/test_meta.py
index 8e36a86c3beef0..581b949d9167f5 100644
--- a/tests/utilities/test_meta.py
+++ b/tests/utilities/test_meta.py
@@ -14,7 +14,7 @@
 from torch import nn
 
 from pytorch_lightning.core.lightning import LightningModule
-from pytorch_lightning.utilities.meta import init_meta_context, materialize_module
+from pytorch_lightning.utilities.meta import init_meta_context, is_on_meta_device, materialize_module
 from tests.helpers.runif import RunIf
 
 
@@ -31,18 +31,23 @@ def __init__(self, num_layers: int):
         self.layer = nn.Sequential(*[nn.Linear(1, 1) for _ in range(self.hparams.num_layers)])
 
 
-@RunIf(min_torch="1.10.0")
+@RunIf(special=True, min_torch="1.10.0")
 def test_init_meta_context():
 
     with init_meta_context():
         m = nn.Linear(in_features=1, out_features=1)
+        assert isinstance(m, nn.Linear)
         assert m.weight.device.type == "meta"
+        assert is_on_meta_device(m)
         mlp = MLP(4)
         assert mlp.layer[0].weight.device.type == "meta"
 
         mlp = materialize_module(mlp)
         assert mlp.layer[0].weight.device.type == "cpu"
 
+        assert not is_on_meta_device(mlp)
+        assert not is_on_meta_device(nn.Module())
+
         model = BoringModel(4)
         assert model.layer[0].weight.device.type == "meta"
         materialize_module(model)