From 6cc2a72cf789c55e221732bd1194db4cbfbfd634 Mon Sep 17 00:00:00 2001 From: vincentme <11166542+vincentme@users.noreply.github.com> Date: Sat, 28 Sep 2024 03:19:43 +0800 Subject: [PATCH 01/12] docs: fix a typo of precision (#20252) (cherry picked from commit 5be58f62719a318b9b96d94e4db52ca6f5eb07fc) --- docs/source-fabric/fundamentals/launch.rst | 2 +- src/lightning/fabric/cli.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source-fabric/fundamentals/launch.rst b/docs/source-fabric/fundamentals/launch.rst index f8c0deecf4e25..81b6cd9d186f1 100644 --- a/docs/source-fabric/fundamentals/launch.rst +++ b/docs/source-fabric/fundamentals/launch.rst @@ -116,7 +116,7 @@ This is essentially the same as running ``python path/to/your/script.py``, but i machine. --precision [16-mixed|bf16-mixed|32-true|64-true|64|32|16|bf16] Double precision (``64-true`` or ``64``), - full precision (``32-true`` or ``64``), half + full precision (``32-true`` or ``32``), half precision (``16-mixed`` or ``16``) or bfloat16 precision (``bf16-mixed`` or ``bf16``) diff --git a/src/lightning/fabric/cli.py b/src/lightning/fabric/cli.py index 5ca46ba331622..7c81afa916196 100644 --- a/src/lightning/fabric/cli.py +++ b/src/lightning/fabric/cli.py @@ -140,7 +140,7 @@ def _main() -> None: type=click.Choice(get_args(_PRECISION_INPUT_STR) + get_args(_PRECISION_INPUT_STR_ALIAS)), default=None, help=( - "Double precision (``64-true`` or ``64``), full precision (``32-true`` or ``64``), " + "Double precision (``64-true`` or ``64``), full precision (``32-true`` or ``32``), " "half precision (``16-mixed`` or ``16``) or bfloat16 precision (``bf16-mixed`` or ``bf16``)" ), ) From 3bf61186d174fa1fb5ff913f9056a340aa8a0867 Mon Sep 17 00:00:00 2001 From: Tianshu Wang Date: Tue, 1 Oct 2024 00:08:45 +0800 Subject: [PATCH 02/12] Make RichProgressBar visible for both light and dark background (#20260) (cherry picked from commit 474bdd0393666616ba6702eafd637275be649276) --- .../callbacks/progress/rich_progress.py | 37 +++---------------- .../progress/test_rich_progress_bar.py | 14 ------- 2 files changed, 6 insertions(+), 45 deletions(-) diff --git a/src/lightning/pytorch/callbacks/progress/rich_progress.py b/src/lightning/pytorch/callbacks/progress/rich_progress.py index 497e96e11b9c4..896de71267835 100644 --- a/src/lightning/pytorch/callbacks/progress/rich_progress.py +++ b/src/lightning/pytorch/callbacks/progress/rich_progress.py @@ -206,14 +206,14 @@ class RichProgressBarTheme: """ - description: Union[str, "Style"] = "white" + description: Union[str, "Style"] = "" progress_bar: Union[str, "Style"] = "#6206E0" progress_bar_finished: Union[str, "Style"] = "#6206E0" progress_bar_pulse: Union[str, "Style"] = "#6206E0" - batch_progress: Union[str, "Style"] = "white" - time: Union[str, "Style"] = "grey54" - processing_speed: Union[str, "Style"] = "grey70" - metrics: Union[str, "Style"] = "white" + batch_progress: Union[str, "Style"] = "" + time: Union[str, "Style"] = "dim" + processing_speed: Union[str, "Style"] = "dim underline" + metrics: Union[str, "Style"] = "italic" metrics_text_delimiter: str = " " metrics_format: str = ".3f" @@ -280,7 +280,6 @@ def __init__( self._metric_component: Optional[MetricsTextColumn] = None self._progress_stopped: bool = False self.theme = theme - self._update_for_light_colab_theme() @property def refresh_rate(self) -> float: @@ -318,13 +317,6 @@ def test_progress_bar(self) -> "Task": assert self.test_progress_bar_id is not None return self.progress.tasks[self.test_progress_bar_id] - def _update_for_light_colab_theme(self) -> None: - if _detect_light_colab_theme(): - attributes = ["description", "batch_progress", "metrics"] - for attr in attributes: - if getattr(self.theme, attr) == "white": - setattr(self.theme, attr, "black") - @override def disable(self) -> None: self._enabled = False @@ -449,7 +441,7 @@ def on_validation_batch_start( def _add_task(self, total_batches: Union[int, float], description: str, visible: bool = True) -> "TaskID": assert self.progress is not None return self.progress.add_task( - f"[{self.theme.description}]{description}", + f"[{self.theme.description}]{description}" if self.theme.description else description, total=total_batches, visible=visible, ) @@ -656,20 +648,3 @@ def __getstate__(self) -> Dict: state["progress"] = None state["_console"] = None return state - - -def _detect_light_colab_theme() -> bool: - """Detect if it's light theme in Colab.""" - try: - import get_ipython - except (NameError, ModuleNotFoundError): - return False - ipython = get_ipython() - if "google.colab" in str(ipython.__class__): - try: - from google.colab import output - - return output.eval_js('document.documentElement.matches("[theme=light]")') - except ModuleNotFoundError: - return False - return False diff --git a/tests/tests_pytorch/callbacks/progress/test_rich_progress_bar.py b/tests/tests_pytorch/callbacks/progress/test_rich_progress_bar.py index de41035d4d832..b8d3d6d36c075 100644 --- a/tests/tests_pytorch/callbacks/progress/test_rich_progress_bar.py +++ b/tests/tests_pytorch/callbacks/progress/test_rich_progress_bar.py @@ -308,20 +308,6 @@ def test_rich_progress_bar_counter_with_val_check_interval(tmp_path): assert val_bar.total == 4 -@RunIf(rich=True) -@mock.patch("lightning.pytorch.callbacks.progress.rich_progress._detect_light_colab_theme", return_value=True) -def test_rich_progress_bar_colab_light_theme_update(*_): - theme = RichProgressBar().theme - assert theme.description == "black" - assert theme.batch_progress == "black" - assert theme.metrics == "black" - - theme = RichProgressBar(theme=RichProgressBarTheme(description="blue", metrics="red")).theme - assert theme.description == "blue" - assert theme.batch_progress == "black" - assert theme.metrics == "red" - - @RunIf(rich=True) def test_rich_progress_bar_metric_display_task_id(tmp_path): class CustomModel(BoringModel): From d4dfbb747e07f359f19914b2b454d37295c93f3a Mon Sep 17 00:00:00 2001 From: Nishant Dahal <50732732+NishantDahal@users.noreply.github.com> Date: Mon, 30 Sep 2024 22:14:21 +0545 Subject: [PATCH 03/12] docs: add note for `TQDMProgressBar` (#20198) * Add documentation note for TQDMProgressBar --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 66508ff4b7d49264e37d3e8926fa6e39bcb1217c) --- docs/source-pytorch/common/progress_bar.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source-pytorch/common/progress_bar.rst b/docs/source-pytorch/common/progress_bar.rst index e0c29fccdc494..106c2289e5c7b 100644 --- a/docs/source-pytorch/common/progress_bar.rst +++ b/docs/source-pytorch/common/progress_bar.rst @@ -36,6 +36,10 @@ You can update ``refresh_rate`` (rate (number of batches) at which the progress trainer = Trainer(callbacks=[TQDMProgressBar(refresh_rate=10)]) +.. note:: + + The ``smoothing`` option has no effect when using the default implementation of :class:`~lightning.pytorch.callbacks.TQDMProgressBar`, as the progress bar is updated using the ``bar.refresh()`` method instead of ``bar.update()``. This can cause the progress bar to become desynchronized with the actual progress. To avoid this issue, you can use the ``bar.update()`` method instead, but this may require customizing the :class:`~lightning.pytorch.callbacks.TQDMProgressBar` class. + By default the training progress bar is reset (overwritten) at each new epoch. If you wish for a new progress bar to be displayed at the end of every epoch, set :paramref:`TQDMProgressBar.leave ` to ``True``. From 545f4089a53a5368041a992cf61b4edc80f2fa12 Mon Sep 17 00:00:00 2001 From: Ali Alshaarawy <45029495+ali-alshaar7@users.noreply.github.com> Date: Wed, 2 Oct 2024 12:18:43 -0400 Subject: [PATCH 04/12] update BitsandBytes version (#20313) * upggrade requiremnets.txt * update fabric bitsandbytes linear quantization for bnb 0.44.1 * add quant_storage param * exclude macos from bnb upgrade * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 5dea36c5e2969aa8823213d6602e058db093ec57) --- requirements/fabric/strategies.txt | 3 ++- requirements/pytorch/extra.txt | 3 ++- src/lightning/fabric/plugins/precision/bitsandbytes.py | 10 ++++++++-- .../pytorch/plugins/precision/bitsandbytes.py | 2 +- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/requirements/fabric/strategies.txt b/requirements/fabric/strategies.txt index 4aee89d9f68e7..394aceb39cd6b 100644 --- a/requirements/fabric/strategies.txt +++ b/requirements/fabric/strategies.txt @@ -6,4 +6,5 @@ # note: is a bug around 0.10 with `MPS_Accelerator must implement all abstract methods` # shall be resolved by https://github.com/microsoft/DeepSpeed/issues/4372 deepspeed >=0.8.2, <=0.9.3; platform_system != "Windows" and platform_system != "Darwin" # strict -bitsandbytes >=0.42.0,<0.43.0 +bitsandbytes >=0.44.0,<0.44.2; sys_platform == 'linux' or sys_platform == 'win32' +bitsandbytes >=0.42.0,<0.43.0 ; sys_platform == 'darwin' diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index 6962da858c4ab..12bbdf5a70ab0 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -8,4 +8,5 @@ hydra-core >=1.2.0, <1.4.0 jsonargparse[signatures] >=4.27.7, <4.28.0 rich >=12.3.0, <13.6.0 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute -bitsandbytes >=0.42.0,<0.43.0 +bitsandbytes >=0.44.0,<0.44.2; sys_platform == 'linux' or sys_platform == 'win32' +bitsandbytes >=0.42.0,<0.43.0 ; sys_platform == 'darwin' diff --git a/src/lightning/fabric/plugins/precision/bitsandbytes.py b/src/lightning/fabric/plugins/precision/bitsandbytes.py index 0f524dd67fad9..394415452890a 100644 --- a/src/lightning/fabric/plugins/precision/bitsandbytes.py +++ b/src/lightning/fabric/plugins/precision/bitsandbytes.py @@ -43,7 +43,7 @@ class BitsandbytesPrecision(Precision): - """Plugin for quantizing weights with `bitsandbytes `__. + """Plugin for quantizing weights with `bitsandbytes `__. .. warning:: This is an :ref:`experimental ` feature. @@ -184,11 +184,15 @@ def _replace_param( if param.device.type == "meta": if isinstance(param, bnb.nn.Params4bit): return bnb.nn.Params4bit( - data, + data=data, requires_grad=data.requires_grad, quant_state=quant_state, + blocksize=param.blocksize, compress_statistics=param.compress_statistics, quant_type=param.quant_type, + quant_storage=param.quant_storage, + module=param.module, + bnb_quantized=param.bnb_quantized, ) return torch.nn.Parameter(data, requires_grad=data.requires_grad) param.data = data @@ -322,6 +326,7 @@ def quantize_(self, weight: Optional[torch.Tensor] = None, device: Optional[torc return assert isinstance(self.weight, bnb.nn.Params4bit) self.weight = self.quantize(self.weight, weight, device) + self.weight.bnb_quantized = True @staticmethod def quantize( @@ -337,6 +342,7 @@ def quantize( blocksize=params4bit.blocksize, compress_statistics=params4bit.compress_statistics, quant_type=params4bit.quant_type, + quant_storage=params4bit.quant_storage, ) return _replace_param(params4bit, w_4bit, quant_state) diff --git a/src/lightning/pytorch/plugins/precision/bitsandbytes.py b/src/lightning/pytorch/plugins/precision/bitsandbytes.py index 62acc7bf77c8d..3a2daa828bc3c 100644 --- a/src/lightning/pytorch/plugins/precision/bitsandbytes.py +++ b/src/lightning/pytorch/plugins/precision/bitsandbytes.py @@ -16,7 +16,7 @@ class BitsandbytesPrecision(Precision, FabricBNBPrecision): - """Plugin for quantizing weights with `bitsandbytes `__. + """Plugin for quantizing weights with `bitsandbytes `__. .. warning:: This is an :ref:`experimental ` feature. From c8c580f2c6188b07b97f709621ff5e57846b4771 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:53:45 +0200 Subject: [PATCH 05/12] fix(lint): emergency bump `docformatter` (#20352) (cherry picked from commit af19dda05c43ad99f0a00d5728bf97c18b7bbd78) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cb7604831767b..24fc40566b152 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -58,7 +58,7 @@ repos: #args: ["--write-changes"] # uncomment if you want to get automatic fixing - repo: https://github.com/PyCQA/docformatter - rev: v1.7.5 + rev: 06907d0267368b49b9180eed423fae5697c1e909 # todo: fix for docformatter after last 1.7.5 hooks: - id: docformatter additional_dependencies: [tomli] From 539e23865b9a69a934060251b26c3f2b6ef1a484 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 21 Oct 2024 15:47:30 +0200 Subject: [PATCH 06/12] docs: fix removed ref to `deepspeed.initialize` (#20353) * docs: fix removed ref to `deepspeed.initialize` * fix links (cherry picked from commit 0e1e14f8158e75e2b9b9b78022645932fded5a12) --- docs/source-pytorch/accelerators/tpu_advanced.rst | 4 ++-- docs/source-pytorch/accelerators/tpu_basic.rst | 4 ++-- docs/source-pytorch/accelerators/tpu_faq.rst | 2 +- src/lightning/fabric/strategies/deepspeed.py | 2 +- src/lightning/fabric/strategies/xla_fsdp.py | 2 +- src/lightning/pytorch/strategies/deepspeed.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/source-pytorch/accelerators/tpu_advanced.rst b/docs/source-pytorch/accelerators/tpu_advanced.rst index e410c6e82539f..d74f9b07374c9 100644 --- a/docs/source-pytorch/accelerators/tpu_advanced.rst +++ b/docs/source-pytorch/accelerators/tpu_advanced.rst @@ -52,7 +52,7 @@ Example: model = WeightSharingModule() trainer = Trainer(max_epochs=1, accelerator="tpu") -See `XLA Documentation `_ +See `XLA Documentation `_ ---- @@ -61,4 +61,4 @@ XLA XLA is the library that interfaces PyTorch with the TPUs. For more information check out `XLA `_. -Guide for `troubleshooting XLA `_ +Guide for `troubleshooting XLA `_ diff --git a/docs/source-pytorch/accelerators/tpu_basic.rst b/docs/source-pytorch/accelerators/tpu_basic.rst index fb4e2b7bde244..217b76106aea9 100644 --- a/docs/source-pytorch/accelerators/tpu_basic.rst +++ b/docs/source-pytorch/accelerators/tpu_basic.rst @@ -108,7 +108,7 @@ There are cases in which training on TPUs is slower when compared with GPUs, for - XLA Graph compilation during the initial steps `Reference `_ - Some tensor ops are not fully supported on TPU, or not supported at all. These operations will be performed on CPU (context switch). -The official PyTorch XLA `performance guide `_ +The official PyTorch XLA `performance guide `_ has more detailed information on how PyTorch code can be optimized for TPU. In particular, the -`metrics report `_ allows +`metrics report `_ allows one to identify operations that lead to context switching. diff --git a/docs/source-pytorch/accelerators/tpu_faq.rst b/docs/source-pytorch/accelerators/tpu_faq.rst index f4b2c60633d26..109449ef2cc9a 100644 --- a/docs/source-pytorch/accelerators/tpu_faq.rst +++ b/docs/source-pytorch/accelerators/tpu_faq.rst @@ -78,7 +78,7 @@ A lot of PyTorch operations aren't lowered to XLA, which could lead to significa These operations are moved to the CPU memory and evaluated, and then the results are transferred back to the XLA device(s). By using the `xla_debug` Strategy, users could create a metrics report to diagnose issues. -The report includes things like (`XLA Reference `_): +The report includes things like (`XLA Reference `_): * how many times we issue XLA compilations and time spent on issuing. * how many times we execute and time spent on execution diff --git a/src/lightning/fabric/strategies/deepspeed.py b/src/lightning/fabric/strategies/deepspeed.py index 93a17f10c8998..e71b8e2db3d58 100644 --- a/src/lightning/fabric/strategies/deepspeed.py +++ b/src/lightning/fabric/strategies/deepspeed.py @@ -598,7 +598,7 @@ def _initialize_engine( ) -> Tuple["DeepSpeedEngine", Optimizer]: """Initialize one model and one optimizer with an optional learning rate scheduler. - This calls :func:`deepspeed.initialize` internally. + This calls ``deepspeed.initialize`` internally. """ import deepspeed diff --git a/src/lightning/fabric/strategies/xla_fsdp.py b/src/lightning/fabric/strategies/xla_fsdp.py index 6da693bafb1c8..e4c080d8110db 100644 --- a/src/lightning/fabric/strategies/xla_fsdp.py +++ b/src/lightning/fabric/strategies/xla_fsdp.py @@ -56,7 +56,7 @@ class XLAFSDPStrategy(ParallelStrategy, _Sharded): .. warning:: This is an :ref:`experimental ` feature. - For more information check out https://github.com/pytorch/xla/blob/master/docs/fsdp.md + For more information check out https://github.com/pytorch/xla/blob/v2.5.0/docs/fsdp.md Args: auto_wrap_policy: Same as ``auto_wrap_policy`` parameter in diff --git a/src/lightning/pytorch/strategies/deepspeed.py b/src/lightning/pytorch/strategies/deepspeed.py index 382f8070898f8..1eaa5bab75fbe 100644 --- a/src/lightning/pytorch/strategies/deepspeed.py +++ b/src/lightning/pytorch/strategies/deepspeed.py @@ -414,7 +414,7 @@ def _setup_model_and_optimizer( ) -> Tuple["deepspeed.DeepSpeedEngine", Optimizer]: """Initialize one model and one optimizer with an optional learning rate scheduler. - This calls :func:`deepspeed.initialize` internally. + This calls ``deepspeed.initialize`` internally. """ import deepspeed From 489da5dd0f9a43ccf5364486c04e826b9686615e Mon Sep 17 00:00:00 2001 From: WuJian Date: Mon, 21 Oct 2024 22:11:22 +0800 Subject: [PATCH 07/12] docs: fix pytorch version typo in upgrade/from_2_0 (#20333) (cherry picked from commit 6f86497f5539bbe987a5efe97304f97e7f3a31e6) --- docs/source-pytorch/upgrade/sections/2_0_regular.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source-pytorch/upgrade/sections/2_0_regular.rst b/docs/source-pytorch/upgrade/sections/2_0_regular.rst index 192f20bc669b9..2f94ef7ab66fd 100644 --- a/docs/source-pytorch/upgrade/sections/2_0_regular.rst +++ b/docs/source-pytorch/upgrade/sections/2_0_regular.rst @@ -6,7 +6,7 @@ - Then - Ref - * - used PyTorch 3.11 + * - used PyTorch 1.11 - upgrade to PyTorch 2.1 or higher - `PR18691`_ From cf7cc9ac53bb8190b3d827c1edd965596bb8858a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 16:15:01 +0200 Subject: [PATCH 08/12] build(deps): bump Lightning-AI/utilities from 0.11.7 to 0.11.8 (#20354) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 8c5fc89f39c34f4832ebb89fcfff7bc05c5b2247) --- .github/workflows/call-clear-cache.yml | 8 ++++---- .github/workflows/ci-check-md-links.yml | 2 +- .github/workflows/ci-schema.yml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/call-clear-cache.yml b/.github/workflows/call-clear-cache.yml index 091e6a002ab3c..4c189879fb48c 100644 --- a/.github/workflows/call-clear-cache.yml +++ b/.github/workflows/call-clear-cache.yml @@ -23,18 +23,18 @@ on: jobs: cron-clear: if: github.event_name == 'schedule' || github.event_name == 'pull_request' - uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.8 with: - scripts-ref: v0.11.7 + scripts-ref: v0.11.8 dry-run: ${{ github.event_name == 'pull_request' }} pattern: "latest|docs" age-days: 7 direct-clear: if: github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' - uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.8 with: - scripts-ref: v0.11.7 + scripts-ref: v0.11.8 dry-run: ${{ github.event_name == 'pull_request' }} pattern: ${{ inputs.pattern || 'pypi_wheels' }} # setting str in case of PR / debugging age-days: ${{ fromJSON(inputs.age-days) || 0 }} # setting 0 in case of PR / debugging diff --git a/.github/workflows/ci-check-md-links.yml b/.github/workflows/ci-check-md-links.yml index 53b06c207482d..af5378c4221e3 100644 --- a/.github/workflows/ci-check-md-links.yml +++ b/.github/workflows/ci-check-md-links.yml @@ -14,7 +14,7 @@ on: jobs: check-md-links: - uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.11.8 with: config-file: ".github/markdown-links-config.json" base-branch: "master" diff --git a/.github/workflows/ci-schema.yml b/.github/workflows/ci-schema.yml index e5ae526f196b7..2ccaadd2f51f5 100644 --- a/.github/workflows/ci-schema.yml +++ b/.github/workflows/ci-schema.yml @@ -8,7 +8,7 @@ on: jobs: check: - uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.8 with: # skip azure due to the wrong schema file by MSFT # https://github.com/Lightning-AI/lightning-flash/pull/1455#issuecomment-1244793607 From 37d71320f8a7ac1e8cf72ee8d3d4c0cfe40faeb9 Mon Sep 17 00:00:00 2001 From: PL Ghost <75324987+pl-ghost@users.noreply.github.com> Date: Mon, 21 Oct 2024 16:18:56 +0200 Subject: [PATCH 09/12] docs: update ref to latest tutorials (#20322) Co-authored-by: Borda Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 2110a399b1b009bd3902a60dcad2c1bab0a79445) --- _notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_notebooks b/_notebooks index d527353491441..e0cab886b1fcf 160000 --- a/_notebooks +++ b/_notebooks @@ -1 +1 @@ -Subproject commit d5273534914411886ed45d59536f6042d24f6fe0 +Subproject commit e0cab886b1fcfc7928c1498aaa6d2b0832f937a6 From bc34b3bed9a51627dc7539740e0afcbd4c90915c Mon Sep 17 00:00:00 2001 From: PL Ghost <75324987+pl-ghost@users.noreply.github.com> Date: Sun, 3 Nov 2024 08:11:54 +0100 Subject: [PATCH 10/12] docs: update ref to latest tutorials (#20387) update tutorials to `b83fde09` Co-authored-by: Borda (cherry picked from commit 897b2af5efffb51992799ea23930d1d625537637) --- _notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_notebooks b/_notebooks index e0cab886b1fcf..b83fde09c7243 160000 --- a/_notebooks +++ b/_notebooks @@ -1 +1 @@ -Subproject commit e0cab886b1fcfc7928c1498aaa6d2b0832f937a6 +Subproject commit b83fde09c724311af0d528e810b2ba606f31c95e From 24478af975ef51cd4b5b39651698291174faed93 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 4 Nov 2024 17:33:48 +0000 Subject: [PATCH 11/12] ci: bump deprecated mac 12 to 13 (#20393) (cherry picked from commit 3627c5bfac704d44c0d055a2cdf6f3f9e3f9e8c1) --- .github/checkgroup.yml | 20 ++++++++++---------- .github/workflows/ci-pkg-install.yml | 2 +- .github/workflows/ci-tests-fabric.yml | 2 +- .github/workflows/ci-tests-pytorch.yml | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 20875df42c5a8..fa455da015fce 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -37,7 +37,7 @@ subprojects: - "pl-cpu (macOS-14, pytorch, 3.9, 2.1)" - "pl-cpu (ubuntu-20.04, pytorch, 3.9, 2.1)" - "pl-cpu (windows-2022, pytorch, 3.9, 2.1)" - - "pl-cpu (macOS-12, pytorch, 3.10, 2.1)" + - "pl-cpu (macOS-13, pytorch, 3.10, 2.1)" - "pl-cpu (ubuntu-22.04, pytorch, 3.10, 2.1)" - "pl-cpu (windows-2022, pytorch, 3.10, 2.1)" @@ -184,7 +184,7 @@ subprojects: - "fabric-cpu (macOS-14, fabric, 3.9, 2.1)" - "fabric-cpu (ubuntu-20.04, fabric, 3.9, 2.1)" - "fabric-cpu (windows-2022, fabric, 3.9, 2.1)" - - "fabric-cpu (macOS-12, fabric, 3.10, 2.1)" + - "fabric-cpu (macOS-13, fabric, 3.10, 2.1)" - "fabric-cpu (ubuntu-22.04, fabric, 3.10, 2.1)" - "fabric-cpu (windows-2022, fabric, 3.10, 2.1)" @@ -258,14 +258,14 @@ subprojects: - "install-pkg (ubuntu-22.04, lightning, 3.11)" - "install-pkg (ubuntu-22.04, notset, 3.9)" - "install-pkg (ubuntu-22.04, notset, 3.11)" - - "install-pkg (macOS-12, fabric, 3.9)" - - "install-pkg (macOS-12, fabric, 3.11)" - - "install-pkg (macOS-12, pytorch, 3.9)" - - "install-pkg (macOS-12, pytorch, 3.11)" - - "install-pkg (macOS-12, lightning, 3.9)" - - "install-pkg (macOS-12, lightning, 3.11)" - - "install-pkg (macOS-12, notset, 3.9)" - - "install-pkg (macOS-12, notset, 3.11)" + - "install-pkg (macOS-13, fabric, 3.9)" + - "install-pkg (macOS-13, fabric, 3.11)" + - "install-pkg (macOS-13, pytorch, 3.9)" + - "install-pkg (macOS-13, pytorch, 3.11)" + - "install-pkg (macOS-13, lightning, 3.9)" + - "install-pkg (macOS-13, lightning, 3.11)" + - "install-pkg (macOS-13, notset, 3.9)" + - "install-pkg (macOS-13, notset, 3.11)" - "install-pkg (windows-2022, fabric, 3.9)" - "install-pkg (windows-2022, fabric, 3.11)" - "install-pkg (windows-2022, pytorch, 3.9)" diff --git a/.github/workflows/ci-pkg-install.yml b/.github/workflows/ci-pkg-install.yml index d22a8d3ace1e2..f096a709595af 100644 --- a/.github/workflows/ci-pkg-install.yml +++ b/.github/workflows/ci-pkg-install.yml @@ -42,7 +42,7 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-22.04", "macOS-12", "windows-2022"] + os: ["ubuntu-22.04", "macOS-13", "windows-2022"] pkg-name: ["fabric", "pytorch", "lightning", "notset"] python-version: ["3.9", "3.11"] steps: diff --git a/.github/workflows/ci-tests-fabric.yml b/.github/workflows/ci-tests-fabric.yml index 06616650deb9c..7d854bbf7e618 100644 --- a/.github/workflows/ci-tests-fabric.yml +++ b/.github/workflows/ci-tests-fabric.yml @@ -53,7 +53,7 @@ jobs: - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } # only run PyTorch latest with Python latest, use Fabric scope to limit dependency issues - - { os: "macOS-12", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } + - { os: "macOS-13", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } - { os: "windows-2022", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } # "oldest" versions tests, only on minimum Python diff --git a/.github/workflows/ci-tests-pytorch.yml b/.github/workflows/ci-tests-pytorch.yml index 4de22a24f36e6..a9d7dfdf55578 100644 --- a/.github/workflows/ci-tests-pytorch.yml +++ b/.github/workflows/ci-tests-pytorch.yml @@ -57,7 +57,7 @@ jobs: - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } # only run PyTorch latest with Python latest, use PyTorch scope to limit dependency issues - - { os: "macOS-12", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } + - { os: "macOS-13", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-22.04", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } - { os: "windows-2022", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } # "oldest" versions tests, only on minimum Python From dabbf83e991f6a298c0c1661b166e87dbdfb665e Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Tue, 12 Nov 2024 15:59:08 +0100 Subject: [PATCH 12/12] bump: Torch `2.5` (#20351) * bump: Torch `2.5.0` * push docker * docker * 2.5.1 and mypy * update USE_DISTRIBUTED=0 test * also for pytorch lightning no distributed * set USE_LIBUV=0 on windows * try drop pickle warning * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * disable compiling update_metrics * bump 2.2.x to bugfix * disable also log in logger connector (also calls metric) * more point release bumps * remove unloved type ignore and print some more on exit * update checkgroup * minor versions * shortened version in build-pl * pytorch 2.4 is with python 3.11 * 2.1 and 2.3 without patch release * for 2.4.1: docker with 3.11 test with 3.12 --------- Co-authored-by: Thomas Viehmann Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 61a403a512466d65ebe730b1cc0cf4a909a533f2) --- .azure/gpu-benchmarks.yml | 2 +- .azure/gpu-tests-fabric.yml | 2 +- .azure/gpu-tests-pytorch.yml | 2 +- .github/checkgroup.yml | 44 +++++++++++-------- .github/workflows/ci-tests-fabric.yml | 15 ++++--- .github/workflows/ci-tests-pytorch.yml | 15 ++++--- .github/workflows/docker-build.yml | 22 +++++++--- requirements/fabric/base.txt | 2 +- requirements/fabric/examples.txt | 4 +- requirements/fabric/test.txt | 2 +- requirements/pytorch/base.txt | 4 +- requirements/pytorch/examples.txt | 4 +- requirements/typing.txt | 2 +- src/lightning/fabric/__init__.py | 5 +++ src/lightning/pytorch/core/module.py | 6 ++- .../connectors/logger_connector/result.py | 2 + tests/run_standalone_tests.sh | 1 + tests/tests_fabric/utilities/test_imports.py | 12 +++++ .../callbacks/test_early_stopping.py | 8 +--- .../checkpointing/test_model_checkpoint.py | 8 +--- .../core/test_metric_result_integration.py | 4 +- tests/tests_pytorch/helpers/test_datasets.py | 8 +--- tests/tests_pytorch/loggers/test_all.py | 9 +--- tests/tests_pytorch/loggers/test_logger.py | 5 +-- tests/tests_pytorch/loggers/test_wandb.py | 5 +-- tests/tests_pytorch/utilities/test_imports.py | 12 +++++ 26 files changed, 117 insertions(+), 88 deletions(-) diff --git a/.azure/gpu-benchmarks.yml b/.azure/gpu-benchmarks.yml index 111589945e048..24b78542a798a 100644 --- a/.azure/gpu-benchmarks.yml +++ b/.azure/gpu-benchmarks.yml @@ -46,7 +46,7 @@ jobs: variables: DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' ) container: - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0" options: "--gpus=all --shm-size=32g" strategy: matrix: diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index e63641b8ecc7d..ee7fe2e281478 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -60,7 +60,7 @@ jobs: image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0" PACKAGE_NAME: "fabric" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0" PACKAGE_NAME: "lightning" workspace: clean: all diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 4605e824426e9..1ece70f75e193 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -53,7 +53,7 @@ jobs: image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0" PACKAGE_NAME: "pytorch" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0" PACKAGE_NAME: "lightning" pool: lit-rtx-3090 variables: diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index fa455da015fce..b9fcde984bc74 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -21,19 +21,22 @@ subprojects: checks: - "pl-cpu (macOS-13, lightning, 3.9, 2.1, oldest)" - "pl-cpu (macOS-14, lightning, 3.10, 2.1)" - - "pl-cpu (macOS-14, lightning, 3.11, 2.2)" + - "pl-cpu (macOS-14, lightning, 3.11, 2.2.2)" - "pl-cpu (macOS-14, lightning, 3.11, 2.3)" - - "pl-cpu (macOS-14, lightning, 3.12, 2.4)" + - "pl-cpu (macOS-14, lightning, 3.12, 2.4.1)" + - "pl-cpu (macOS-14, lightning, 3.12, 2.5.1)" - "pl-cpu (ubuntu-20.04, lightning, 3.9, 2.1, oldest)" - "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.1)" - - "pl-cpu (ubuntu-20.04, lightning, 3.11, 2.2)" + - "pl-cpu (ubuntu-20.04, lightning, 3.11, 2.2.2)" - "pl-cpu (ubuntu-20.04, lightning, 3.11, 2.3)" - - "pl-cpu (ubuntu-20.04, lightning, 3.12, 2.4)" + - "pl-cpu (ubuntu-22.04, lightning, 3.12, 2.4.1)" + - "pl-cpu (ubuntu-22.04, lightning, 3.12, 2.5.1)" - "pl-cpu (windows-2022, lightning, 3.9, 2.1, oldest)" - "pl-cpu (windows-2022, lightning, 3.10, 2.1)" - - "pl-cpu (windows-2022, lightning, 3.11, 2.2)" + - "pl-cpu (windows-2022, lightning, 3.11, 2.2.2)" - "pl-cpu (windows-2022, lightning, 3.11, 2.3)" - - "pl-cpu (windows-2022, lightning, 3.12, 2.4)" + - "pl-cpu (windows-2022, lightning, 3.12, 2.4.1)" + - "pl-cpu (windows-2022, lightning, 3.12, 2.5.1)" - "pl-cpu (macOS-14, pytorch, 3.9, 2.1)" - "pl-cpu (ubuntu-20.04, pytorch, 3.9, 2.1)" - "pl-cpu (windows-2022, pytorch, 3.9, 2.1)" @@ -141,15 +144,17 @@ subprojects: - "!*.md" - "!**/*.md" checks: - - "build-cuda (3.11, 2.1, 12.1.0)" - - "build-cuda (3.11, 2.2, 12.1.0)" - - "build-cuda (3.11, 2.3, 12.1.0)" - - "build-cuda (3.12, 2.4, 12.1.0)" + - "build-cuda (3.10, 2.1.2, 12.1.0)" + - "build-cuda (3.11, 2.2.2, 12.1.0)" + - "build-cuda (3.11, 2.3.1, 12.1.0)" + - "build-cuda (3.11, 2.4.1, 12.1.0)" + - "build-cuda (3.12, 2.5.1, 12.1.0)" #- "build-NGC" - - "build-pl (3.11, 2.1, 12.1.0)" + - "build-pl (3.10, 2.1, 12.1.0)" - "build-pl (3.11, 2.2, 12.1.0)" - "build-pl (3.11, 2.3, 12.1.0)" - - "build-pl (3.12, 2.4, 12.1.0)" + - "build-pl (3.11, 2.4, 12.1.0)" + - "build-pl (3.12, 2.5, 12.1.0)" # SECTION: lightning_fabric @@ -168,19 +173,22 @@ subprojects: checks: - "fabric-cpu (macOS-13, lightning, 3.9, 2.1, oldest)" - "fabric-cpu (macOS-14, lightning, 3.10, 2.1)" - - "fabric-cpu (macOS-14, lightning, 3.11, 2.2)" + - "fabric-cpu (macOS-14, lightning, 3.11, 2.2.2)" - "fabric-cpu (macOS-14, lightning, 3.11, 2.3)" - - "fabric-cpu (macOS-14, lightning, 3.12, 2.4)" + - "fabric-cpu (macOS-14, lightning, 3.12, 2.4.1)" + - "fabric-cpu (macOS-14, lightning, 3.12, 2.5.1)" - "fabric-cpu (ubuntu-20.04, lightning, 3.9, 2.1, oldest)" - "fabric-cpu (ubuntu-20.04, lightning, 3.10, 2.1)" - - "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.2)" + - "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.2.2)" - "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.3)" - - "fabric-cpu (ubuntu-20.04, lightning, 3.12, 2.4)" + - "fabric-cpu (ubuntu-22.04, lightning, 3.12, 2.4.1)" + - "fabric-cpu (ubuntu-22.04, lightning, 3.12, 2.5.1)" - "fabric-cpu (windows-2022, lightning, 3.9, 2.1, oldest)" - "fabric-cpu (windows-2022, lightning, 3.10, 2.1)" - - "fabric-cpu (windows-2022, lightning, 3.11, 2.2)" + - "fabric-cpu (windows-2022, lightning, 3.11, 2.2.2)" - "fabric-cpu (windows-2022, lightning, 3.11, 2.3)" - - "fabric-cpu (windows-2022, lightning, 3.12, 2.4)" + - "fabric-cpu (windows-2022, lightning, 3.12, 2.4.1)" + - "fabric-cpu (windows-2022, lightning, 3.12, 2.5.1)" - "fabric-cpu (macOS-14, fabric, 3.9, 2.1)" - "fabric-cpu (ubuntu-20.04, fabric, 3.9, 2.1)" - "fabric-cpu (windows-2022, fabric, 3.9, 2.1)" diff --git a/.github/workflows/ci-tests-fabric.yml b/.github/workflows/ci-tests-fabric.yml index 7d854bbf7e618..ca4dd0b845750 100644 --- a/.github/workflows/ci-tests-fabric.yml +++ b/.github/workflows/ci-tests-fabric.yml @@ -43,15 +43,18 @@ jobs: - { os: "macOS-14", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } - - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } - - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } + - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } # only run PyTorch latest with Python latest, use Fabric scope to limit dependency issues - { os: "macOS-13", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } diff --git a/.github/workflows/ci-tests-pytorch.yml b/.github/workflows/ci-tests-pytorch.yml index a9d7dfdf55578..0c7deddbe5923 100644 --- a/.github/workflows/ci-tests-pytorch.yml +++ b/.github/workflows/ci-tests-pytorch.yml @@ -47,15 +47,18 @@ jobs: - { os: "macOS-14", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } - - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } - - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } + - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } # only run PyTorch latest with Python latest, use PyTorch scope to limit dependency issues - { os: "macOS-13", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-22.04", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 6df2b8cbb73d3..09ae3adc45ac6 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -43,10 +43,11 @@ jobs: include: # We only release one docker image per PyTorch version. # Make sure the matrix here matches the one below. - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" } - - { python_version: "3.12", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 with: @@ -103,10 +104,11 @@ jobs: include: # These are the base images for PL release docker images. # Make sure the matrix here matches the one above. - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" } - - { python_version: "3.12", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1.2", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.2.2", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.3.1", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.0" } + - { python_version: "3.12", pytorch_version: "2.5.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 @@ -115,6 +117,12 @@ jobs: with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} + + - name: shorten Torch version + run: | + # convert 1.10.2 to 1.10 + pt_version=$(echo ${{ matrix.pytorch_version }} | cut -d. -f1,2) + echo "PT_VERSION=$pt_version" >> $GITHUB_ENV - uses: docker/build-push-action@v6 with: build-args: | @@ -123,7 +131,7 @@ jobs: CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/base-cuda/Dockerfile push: ${{ env.PUSH_NIGHTLY }} - tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }}" + tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ env.PT_VERSION }}-cuda${{ matrix.cuda_version }}" timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v2 if: failure() && env.PUSH_NIGHTLY == 'true' diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 0a99614a46870..42c055e85ca7d 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.5.0 +torch >=2.1.0, <2.6.0 fsspec[http] >=2022.5.0, <2024.4.0 packaging >=20.0, <=23.1 typing-extensions >=4.4.0, <4.10.0 diff --git a/requirements/fabric/examples.txt b/requirements/fabric/examples.txt index cb4135da2409a..3352db77d8bd9 100644 --- a/requirements/fabric/examples.txt +++ b/requirements/fabric/examples.txt @@ -1,6 +1,6 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torchvision >=0.16.0, <0.20.0 -torchmetrics >=0.10.0, <1.3.0 +torchvision >=0.16.0, <0.21.0 +torchmetrics >=0.10.0, <1.5.0 lightning-utilities >=0.8.0, <0.12.0 diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 8fb9122051eec..2da6ae8854d64 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -7,4 +7,4 @@ pytest-rerunfailures ==12.0 pytest-random-order ==1.1.0 click ==8.1.7 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute -torchmetrics >=0.7.0, <1.3.0 # needed for using fixed compare_version +torchmetrics >=0.7.0, <1.5.0 # needed for using fixed compare_version diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 6ff628d7edfb5..94aca759c37e2 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -1,11 +1,11 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.5.0 +torch >=2.1.0, <2.6.0 tqdm >=4.57.0, <4.67.0 PyYAML >=5.4, <6.1.0 fsspec[http] >=2022.5.0, <2024.4.0 -torchmetrics >=0.7.0, <1.3.0 # needed for using fixed compare_version +torchmetrics >=0.7.0, <1.5.0 # needed for using fixed compare_version packaging >=20.0, <=23.1 typing-extensions >=4.4.0, <4.10.0 lightning-utilities >=0.10.0, <0.12.0 diff --git a/requirements/pytorch/examples.txt b/requirements/pytorch/examples.txt index 9a6ae7e47dfb8..2e793e0045da9 100644 --- a/requirements/pytorch/examples.txt +++ b/requirements/pytorch/examples.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment requests <2.32.0 -torchvision >=0.16.0, <0.20.0 +torchvision >=0.16.0, <0.21.0 ipython[all] <8.15.0 -torchmetrics >=0.10.0, <1.3.0 +torchmetrics >=0.10.0, <1.5.0 lightning-utilities >=0.8.0, <0.12.0 diff --git a/requirements/typing.txt b/requirements/typing.txt index 0323edfd6098a..71414998dd7f3 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy==1.11.0 -torch==2.4.1 +torch==2.5.1 types-Markdown types-PyYAML diff --git a/src/lightning/fabric/__init__.py b/src/lightning/fabric/__init__.py index 921d3d61e60fe..d675b21e5d1d2 100644 --- a/src/lightning/fabric/__init__.py +++ b/src/lightning/fabric/__init__.py @@ -2,6 +2,7 @@ import logging import os +import sys from lightning_utilities.core.imports import package_available @@ -26,6 +27,10 @@ # https://github.com/pytorch/pytorch/issues/83973 os.environ["PYTORCH_NVML_BASED_CUDA_CHECK"] = "1" +# see https://github.com/pytorch/pytorch/issues/139990 +if sys.platform == "win32": + os.environ["USE_LIBUV"] = "0" + from lightning.fabric.fabric import Fabric # noqa: E402 from lightning.fabric.utilities.seed import seed_everything # noqa: E402 diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py index 782fc40d928ef..d8374ef7ea5e8 100644 --- a/src/lightning/pytorch/core/module.py +++ b/src/lightning/pytorch/core/module.py @@ -531,7 +531,7 @@ def log( logger=logger, on_step=on_step, on_epoch=on_epoch, - reduce_fx=reduce_fx, # type: ignore[arg-type] + reduce_fx=reduce_fx, enable_graph=enable_graph, add_dataloader_idx=add_dataloader_idx, batch_size=batch_size, @@ -1405,7 +1405,9 @@ def forward(self, x): input_sample = self._apply_batch_transfer_handler(input_sample) file_path = str(file_path) if isinstance(file_path, Path) else file_path - torch.onnx.export(self, input_sample, file_path, **kwargs) + # PyTorch (2.5) declares file_path to be str | PathLike[Any] | None, but + # BytesIO does work, too. + torch.onnx.export(self, input_sample, file_path, **kwargs) # type: ignore self.train(mode) @torch.no_grad() diff --git a/src/lightning/pytorch/trainer/connectors/logger_connector/result.py b/src/lightning/pytorch/trainer/connectors/logger_connector/result.py index 583105c3660e0..62cc7844d3897 100644 --- a/src/lightning/pytorch/trainer/connectors/logger_connector/result.py +++ b/src/lightning/pytorch/trainer/connectors/logger_connector/result.py @@ -351,6 +351,7 @@ def _extract_batch_size(self, value: _ResultMetric, batch_size: Optional[int], m return batch_size + @torch.compiler.disable def log( self, fx: str, @@ -413,6 +414,7 @@ def log( batch_size = self._extract_batch_size(self[key], batch_size, meta) self.update_metrics(key, value, batch_size) + @torch.compiler.disable def update_metrics(self, key: str, value: _VALUE, batch_size: int) -> None: result_metric = self[key] # performance: avoid calling `__call__` to avoid the checks in `torch.nn.Module._call_impl` diff --git a/tests/run_standalone_tests.sh b/tests/run_standalone_tests.sh index 0aa0bacff168a..8a4d8e180d112 100755 --- a/tests/run_standalone_tests.sh +++ b/tests/run_standalone_tests.sh @@ -48,6 +48,7 @@ function show_batched_output { # heuristic: stop if there's mentions of errors. this can prevent false negatives when only some of the ranks fail if perl -nle 'print if /error|(?