diff --git a/.circleci/config.yml b/.circleci/config.yml index d13c73e0a9d50..22d6981b5bd44 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,6 @@ references: name: Install Dependences command: | pip install "$TORCH_VERSION" --user - # this is temporal fix til test-tube is not merged and released pip install -r requirements.txt --user sudo pip install pytest pytest-cov pytest-flake8 pip install -r ./tests/requirements.txt --user @@ -21,7 +20,16 @@ references: name: Testing command: | python --version ; pip --version ; pip list - py.test pytorch_lightning tests pl_examples -v --doctest-modules --junitxml=test-reports/pytest_junit.xml + py.test pytorch_lightning tests -v --doctest-modules --junitxml=test-reports/pytest_junit.xml + no_output_timeout: 15m + + examples: &examples + run: + name: PL Examples + command: | + pip install -r ./pl_examples/requirements.txt --user + python --version ; pip --version ; pip list + py.test pl_examples -v --doctest-modules --junitxml=test-reports/pytest_junit.xml no_output_timeout: 15m install_pkg: &install_pkg @@ -84,10 +92,8 @@ jobs: - TORCH_VERSION: "torch" steps: &steps - checkout - - *install_deps - *tests - - store_test_results: path: test-reports - store_artifacts: @@ -121,6 +127,16 @@ jobs: - TORCH_VERSION: "torch>=1.4, <1.5" steps: *steps + Examples: + docker: + - image: circleci/python:3.7 + environment: + - TORCH_VERSION: "torch" + steps: + - checkout + - *install_deps + - *examples + Install-pkg: docker: - image: circleci/python:3.7 @@ -141,3 +157,4 @@ workflows: - PyTorch-v1.3 - PyTorch-v1.4 - Install-pkg + - Examples diff --git a/.drone.yml b/.drone.yml index 43cba1e780c96..4235983f01d34 100644 --- a/.drone.yml +++ b/.drone.yml @@ -6,7 +6,7 @@ name: torch-GPU steps: - name: testing - image: nvcr.io/nvidia/pytorch:20.02-py3 + image: pytorch/pytorch:1.4-cuda10.1-cudnn7-runtime environment: SLURM_LOCALID: 0 CODECOV_TOKEN: @@ -16,12 +16,13 @@ steps: - pip install pip -U - pip --version - nvidia-smi - #- pip install torch==1.3 + - bash ./tests/install_AMP.sh - pip install -r requirements.txt --user - pip install coverage pytest pytest-cov pytest-flake8 codecov - pip install -r ./tests/requirements.txt --user - pip list - python -c "import torch ; print(' & '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) if torch.cuda.is_available() else 'only CPU')" - - coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules # --flake8 + - coverage run --source pytorch_lightning -m py.test pytorch_lightning tests -v --doctest-modules # --flake8 - coverage report - codecov --token $CODECOV_TOKEN # --pr $DRONE_PULL_REQUEST --build $DRONE_BUILD_NUMBER --branch $DRONE_BRANCH --commit $DRONE_COMMIT --tag $DRONE_TAG + - python tests/collect_env_details.py diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 5312cf397e560..8d0ab36957dcc 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -40,12 +40,12 @@ Minimal means having the shortest code but still preserving the bug. --> ### Environment Please copy and paste the output from our -[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py) +[environment collection script](https://raw.githubusercontent.com/PyTorchLightning/pytorch-lightning/master/tests/collect_env_details.py) (or fill out the checklist below manually). You can get the script and run it with: ``` -wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py +wget https://raw.githubusercontent.com/PyTorchLightning/pytorch-lightning/master/tests/collect_env_details.py # For security purposes, please check the contents of collect_env.py before running it. python collect_env.py ``` diff --git a/.github/stale.yml b/.github/stale.yml index 618f19bbe80f8..365edce362c82 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -1,9 +1,9 @@ # https://github.com/marketplace/stale # Number of days of inactivity before an issue becomes stale -daysUntilStale: 90 +daysUntilStale: 60 # Number of days of inactivity before a stale issue is closed -daysUntilClose: 14 +daysUntilClose: 9 # Issues with these labels will never be considered stale exemptLabels: - pinned diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 6371e9e4b58eb..2e4c00cba5b21 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -23,7 +23,7 @@ jobs: python-version: [3.6, 3.7] requires: ['minimal', 'latest'] - # https://stackoverflow.com/a/59076067/4521646 + # Timeout: https://stackoverflow.com/a/59076067/4521646 timeout-minutes: 20 steps: - uses: actions/checkout@v2 @@ -32,6 +32,12 @@ jobs: with: python-version: ${{ matrix.python-version }} + # Github Actions: Run step on specific OS: https://stackoverflow.com/a/57948488/4521646 + - name: Setup macOS + if: runner.os == 'macOS' + run: | + brew install libomp # https://github.com/pytorch/pytorch/issues/20030 + - name: Set min. dependencies if: matrix.requires == 'minimal' run: | @@ -71,7 +77,7 @@ jobs: run: | # tox --sitepackages # flake8 . - coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}.xml + coverage run --source pytorch_lightning -m py.test pytorch_lightning tests -v --doctest-modules --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}.xml coverage report - name: Upload pytest test results diff --git a/.markdownlint.yml b/.markdownlint.yml deleted file mode 100644 index bc310daa64734..0000000000000 --- a/.markdownlint.yml +++ /dev/null @@ -1,2 +0,0 @@ -MD013: false # headers with the same names -MD024: false # line length diff --git a/.run_local_tests.sh b/.run_local_tests.sh index ce2a92081986c..20fe84ff22fcf 100644 --- a/.run_local_tests.sh +++ b/.run_local_tests.sh @@ -12,5 +12,5 @@ rm -rf ./tests/cometruns* rm -rf ./tests/wandb* rm -rf ./tests/tests/* rm -rf ./lightning_logs -coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules --flake8 -coverage report -m +python -m coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules --flake8 +python -m coverage report -m diff --git a/CHANGELOG.md b/CHANGELOG.md index c948e22e7b553..d4a848bd4d822 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added +- Added Reinforcement Learning - Deep Q-network (DQN) lightning example ([#1232](https://github.com/PyTorchLightning/pytorch-lightning/pull/1232)) - Added support for hierarchical `dict` ([#1152](https://github.com/PyTorchLightning/pytorch-lightning/pull/1152)) - Added `TrainsLogger` class ([#1122](https://github.com/PyTorchLightning/pytorch-lightning/pull/1122)) - Added type hints to `pytorch_lightning.core` ([#946](https://github.com/PyTorchLightning/pytorch-lightning/pull/946)) - Added support for `IterableDataset` in validation and testing ([#1104](https://github.com/PyTorchLightning/pytorch-lightning/pull/1104)) - Added support for non-primitive types in `hparams` for `TensorboardLogger` ([#1130](https://github.com/PyTorchLightning/pytorch-lightning/pull/1130)) - Added a check that stops the training when loss or weights contain `NaN` or `inf` values. ([#1097](https://github.com/PyTorchLightning/pytorch-lightning/pull/1097)) +- Updated references to self.forward() to instead use the `__call__` interface. ([#1211](https://github.com/PyTorchLightning/pytorch-lightning/pull/1211)) ### Changed @@ -29,11 +31,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed - - `Trainer.add_argparse_args` classmethod fixed. Now it adds a type for the arguments ([#1147](https://github.com/PyTorchLightning/pytorch-lightning/pull/1147)). - Fixed bug related to type cheking of `ReduceLROnPlateau` lr schedulers([#1114](https://github.com/PyTorchLightning/pytorch-lightning/issues/1114)) - Fixed a bug to ensure lightning checkpoints to be backward compatible ([#1132](https://github.com/PyTorchLightning/pytorch-lightning/pull/1132)) - Fixed all warnings and errors in the docs build process ([#1191](https://github.com/PyTorchLightning/pytorch-lightning/pull/1191)) +- Fixed an issue where `val_percent_check=0` would not disable validation ([#1251](https://github.com/PyTorchLightning/pytorch-lightning/pull/1251)) ## [0.7.1] - 2020-03-07 diff --git a/README.md b/README.md index 314ca70d632c7..b95a4319333f4 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ [![license](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/PytorchLightning/pytorch-lightning/blob/master/LICENSE) [![Next Release](https://img.shields.io/badge/Next%20Release-May%2006-.svg)](https://shields.io/) - @@ -39,21 +39,21 @@ removed until codecov badge isn't empy. likely a config error showing nothing on Simple installation from PyPI ```bash -pip install pytorch-lightning +pip install pytorch-lightning ``` -## Docs -- [master](https://pytorch-lightning.readthedocs.io/en/latest) +## Docs +- [master](https://pytorch-lightning.readthedocs.io/en/latest) - [0.7.1](https://pytorch-lightning.readthedocs.io/en/0.7.1/) - [0.6.0](https://pytorch-lightning.readthedocs.io/en/0.6.0/) - [0.5.3.2](https://pytorch-lightning.readthedocs.io/en/0.5.3.2/) -## Demo -[MNIST, GAN, BERT on COLAB!](https://colab.research.google.com/drive/1F_RNcHzTfFuQf-LeKvSlud6x7jXYkG31#scrollTo=HOk9c4_35FKg) +## Demo +[MNIST, GAN, BERT on COLAB!](https://colab.research.google.com/drive/1F_RNcHzTfFuQf-LeKvSlud6x7jXYkG31#scrollTo=HOk9c4_35FKg) [MNIST on TPUs](https://colab.research.google.com/drive/1-_LKx4HwAxl5M6xPJmqAAu444LTDQoa3) ## What is it? -Lightning is a way to organize your PyTorch code to decouple the science code from the engineering. It's more of a style-guide than a framework. +Lightning is a way to organize your PyTorch code to decouple the science code from the engineering. It's more of a style-guide than a framework. To use Lightning, first refactor your research code into a [LightningModule](https://pytorch-lightning.readthedocs.io/en/latest/lightning-module.html). @@ -62,10 +62,10 @@ To use Lightning, first refactor your research code into a [LightningModule](htt And Lightning automates the rest using the [Trainer](https://pytorch-lightning.readthedocs.io/en/latest/trainer.html)! ![PT to PL](docs/source/_images/lightning_module/pt_trainer.png) -Lightning guarantees riguously tested, correct, modern best practices for the automated parts. +Lightning guarantees riguously tested, correct, modern best practices for the automated parts. -## How flexible is it? -As you see, you're just organizing your PyTorch code - there's no abstraction. +## How flexible is it? +As you see, you're just organizing your PyTorch code - there's no abstraction. And for the stuff that the Trainer abstracts out you can [override any part](https://pytorch-lightning.readthedocs.io/en/latest/introduction_guide.html#extensibility) you want to do things like implement your own distributed training, 16-bit precision, or even a custom backwards pass. @@ -78,9 +78,9 @@ For anything else you might need, we have an extensive [callback system](https:/ If you're just getting into deep learning, we recommend you learn PyTorch first! Once you've implemented a few models, come back and use all the advanced features of Lightning :) -## What does lightning control for me? +## What does lightning control for me? -Everything in Blue! +Everything in Blue! This is how lightning separates the science (red) from the engineering (blue). ![Overview](docs/source/_images/general/pl_overview.gif) @@ -92,33 +92,33 @@ If your code IS a mess, then you needed to clean up anyhow ;) [Check out this step-by-step guide](https://towardsdatascience.com/from-pytorch-to-pytorch-lightning-a-gentle-introduction-b371b7caaf09). -## Starting a new project? -[Use our seed-project aimed at reproducibility!](https://github.com/PytorchLightning/pytorch-lightning-conference-seed) +## Starting a new project? +[Use our seed-project aimed at reproducibility!](https://github.com/PytorchLightning/pytorch-lightning-conference-seed) ## Why do I want to use lightning? Although your research/production project might start simple, once you add things like GPU AND TPU training, 16-bit precision, etc, you end up spending more time engineering than researching. Lightning automates AND rigorously tests those parts for you. ## Support -- [7 core contributors](https://pytorch-lightning.readthedocs.io/en/latest/governance.html) who are all a mix of professional engineers, Research Scientists, PhD students from top AI labs. +- [8 core contributors](https://pytorch-lightning.readthedocs.io/en/latest/governance.html) who are all a mix of professional engineers, Research Scientists, PhD students from top AI labs. - 100+ community contributors. Lightning is also part of the [PyTorch ecosystem](https://pytorch.org/ecosystem/) which requires projects to have solid testing, documentation and support. --- - -## README Table of Contents -- [How do I use it](https://github.com/PytorchLightning/pytorch-lightning#how-do-i-do-use-it) -- [What lightning automates](https://github.com/PytorchLightning/pytorch-lightning#what-does-lightning-control-for-me) -- [Tensorboard integration](https://github.com/PytorchLightning/pytorch-lightning#tensorboard) -- [Lightning features](https://github.com/PytorchLightning/pytorch-lightning#lightning-automates-all-of-the-following-each-is-also-configurable) -- [Examples](https://github.com/PytorchLightning/pytorch-lightning#examples) + +## README Table of Contents +- [How do I use it](https://github.com/PytorchLightning/pytorch-lightning#how-do-i-do-use-it) +- [What lightning automates](https://github.com/PytorchLightning/pytorch-lightning#what-does-lightning-control-for-me) +- [Tensorboard integration](https://github.com/PytorchLightning/pytorch-lightning#tensorboard) +- [Lightning features](https://github.com/PytorchLightning/pytorch-lightning#lightning-automates-all-of-the-following-each-is-also-configurable) +- [Examples](https://github.com/PytorchLightning/pytorch-lightning#examples) - [Tutorials](https://github.com/PytorchLightning/pytorch-lightning#tutorials) - [Asking for help](https://github.com/PytorchLightning/pytorch-lightning#asking-for-help) - [Contributing](https://github.com/PytorchLightning/pytorch-lightning/blob/master/.github/CONTRIBUTING.md) -- [Bleeding edge install](https://github.com/PytorchLightning/pytorch-lightning#bleeding-edge) -- [Lightning Design Principles](https://github.com/PytorchLightning/pytorch-lightning#lightning-design-principles) +- [Bleeding edge install](https://github.com/PytorchLightning/pytorch-lightning#bleeding-edge) +- [Lightning Design Principles](https://github.com/PytorchLightning/pytorch-lightning#lightning-design-principles) - [Lightning team](https://github.com/PytorchLightning/pytorch-lightning#lightning-team) -- [FAQ](https://github.com/PytorchLightning/pytorch-lightning#faq) +- [FAQ](https://github.com/PytorchLightning/pytorch-lightning#faq) --- @@ -127,23 +127,23 @@ Here's how you would organize a realistic PyTorch project into Lightning. ![PT to PL](docs/source/_images/mnist_imgs/pt_to_pl.jpg) -The LightningModule defines a *system* such as seq-2-seq, GAN, etc... -It can ALSO define a simple classifier. +The LightningModule defines a *system* such as seq-2-seq, GAN, etc... +It can ALSO define a simple classifier. In summary, you: 1. Define a [LightningModule](https://pytorch-lightning.rtfd.io/en/latest/lightning-module.html) ```python class LitSystem(pl.LightningModule): - + def __init__(self): - super(LitSystem, self).__init__() + super().__init__() # not the best model... self.l1 = torch.nn.Linear(28 * 28, 10) - + def forward(self, x): return torch.relu(self.l1(x.view(x.size(0), -1))) - + def training_step(self, batch, batch_idx): ... ``` @@ -155,12 +155,12 @@ In summary, you: model = LitSystem() # most basic trainer, uses good defaults - trainer = Trainer() - trainer.fit(model) + trainer = Trainer() + trainer.fit(model) ``` - + [Check out the COLAB demo here](https://colab.research.google.com/drive/1F_RNcHzTfFuQf-LeKvSlud6x7jXYkG31#scrollTo=HOk9c4_35FKg) - + ## What types of research works? Anything! Remember, that this is just organized PyTorch code. The Training step defines the core complexity found in the training loop. @@ -171,10 +171,10 @@ The Training step defines the core complexity found in the training loop. # define what happens for training here def training_step(self, batch, batch_idx): x, y = batch - + # define your own forward and loss calculation hidden_states = self.encoder(x) - + # even as complex as a seq-2-seq + attn model # (this is just a toy, non-working example to illustrate) start_token = '' @@ -182,37 +182,37 @@ def training_step(self, batch, batch_idx): loss = 0 for step in range(max_seq_len): attn_context = self.attention_nn(hidden_states, start_token) - pred = self.decoder(start_token, attn_context, last_hidden) + pred = self.decoder(start_token, attn_context, last_hidden) last_hidden = pred pred = self.predict_nn(pred) loss += self.loss(last_hidden, y[step]) - + #toy example as well loss = loss / max_seq_len - return {'loss': loss} + return {'loss': loss} ``` -#### Or as basic as CNN image classification +#### Or as basic as CNN image classification ```python # define what happens for validation here -def validation_step(self, batch, batch_idx): +def validation_step(self, batch, batch_idx): x, y = batch - + # or as basic as a CNN classification - out = self.forward(x) + out = self(x) loss = my_loss(out, y) - return {'loss': loss} + return {'loss': loss} ``` And without changing a single line of code, you could run on CPUs -```python +```python trainer = Trainer(max_epochs=1) ``` Or GPUs -```python +```python # 8 GPUs trainer = Trainer(max_epochs=1, gpus=8) @@ -221,7 +221,7 @@ trainer = Trainer(max_epochs=1, gpus=8, num_nodes=32) ``` Or TPUs -```python +```python trainer = Trainer(num_tpu_cores=8) ``` @@ -233,10 +233,12 @@ trainer.test() ## Visualization Lightning has out-of-the-box integration with the popular logging/visualizing frameworks -- Tensorboard -- MLFlow -- Neptune.ai -- Comet.ml +- [Tensorboard](https://pytorch.org/docs/stable/tensorboard.html) +- [MLFlow](https://mlflow.org/) +- [Neptune.ai](https://neptune.ai/) +- [Comet.ml](https://www.comet.ml/site/) +- [Wandb](https://www.wandb.com/) +- [Trains](https://github.com/allegroai/trains) - ... ![tensorboard-support](docs/source/_images/general/tf_loss.png) @@ -251,10 +253,10 @@ Lightning has out-of-the-box integration with the popular logging/visualizing fr - Checkpointing - Experiment management - [Full list here](https://pytorch-lightning.readthedocs.io/en/latest/#common-use-cases) - -## Examples -Check out this awesome list of research papers and implementations done with Lightning. + +## Examples +Check out this awesome list of research papers and implementations done with Lightning. - [Contextual Emotion Detection (DoubleDistilBert)](https://github.com/PyTorchLightning/emotion_transformer) - [Generative Adversarial Network](https://colab.research.google.com/drive/1F_RNcHzTfFuQf-LeKvSlud6x7jXYkG31#scrollTo=TyYOdg8g77P0) @@ -270,58 +272,58 @@ Check out this awesome list of research papers and implementations done with Lig - [Transformers text classification](https://github.com/ricardorei/lightning-text-classification) - [VAE Library of over 18+ VAE flavors](https://github.com/AntixK/PyTorch-VAE) -## Tutorials +## Tutorials Check out our [introduction guide](https://pytorch-lightning.readthedocs.io/en/latest/introduction_guide.html) to get started. Or jump straight into [our tutorials](https://pytorch-lightning.readthedocs.io/en/latest/#tutorials). --- -## Asking for help -Welcome to the Lightning community! +## Asking for help +Welcome to the Lightning community! -If you have any questions, feel free to: -1. [read the docs](https://pytorch-lightning.rtfd.io/en/latest/). -2. [Search through the issues](https://github.com/PytorchLightning/pytorch-lightning/issues?utf8=%E2%9C%93&q=my++question). -3. [Ask on stackoverflow](https://stackoverflow.com/questions/ask?guided=false) with the tag pytorch-lightning. +If you have any questions, feel free to: +1. [read the docs](https://pytorch-lightning.rtfd.io/en/latest/). +2. [Search through the issues](https://github.com/PytorchLightning/pytorch-lightning/issues?utf8=%E2%9C%93&q=my++question). +3. [Ask on stackoverflow](https://stackoverflow.com/questions/ask?guided=false) with the tag pytorch-lightning. 4. [Join our slack](https://join.slack.com/t/pytorch-lightning/shared_invite/enQtODU5ODIyNTUzODQwLTFkMDg5Mzc1MDBmNjEzMDgxOTVmYTdhYjA1MDdmODUyOTg2OGQ1ZWZkYTQzODhhNzdhZDA3YmNhMDhlMDY4YzQ). ---- -## FAQ -**How do I use Lightning for rapid research?** -[Here's a walk-through](https://pytorch-lightning.readthedocs.io/en/latest/introduction_guide.html) +--- +## FAQ +**How do I use Lightning for rapid research?** +[Here's a walk-through](https://pytorch-lightning.readthedocs.io/en/latest/introduction_guide.html) -**Why was Lightning created?** +**Why was Lightning created?** Lightning has 3 goals in mind: -1. Maximal flexibility while abstracting out the common boilerplate across research projects. -2. Reproducibility. If all projects use the LightningModule template, it will be much much easier to understand what's going on and where to look! It will also mean every implementation follows a standard format. -3. Democratizing PyTorch power user features. Distributed training? 16-bit? know you need them but don't want to take the time to implement? All good... these come built into Lightning. +1. Maximal flexibility while abstracting out the common boilerplate across research projects. +2. Reproducibility. If all projects use the LightningModule template, it will be much much easier to understand what's going on and where to look! It will also mean every implementation follows a standard format. +3. Democratizing PyTorch power user features. Distributed training? 16-bit? know you need them but don't want to take the time to implement? All good... these come built into Lightning. + +**How does Lightning compare with Ignite and fast.ai?** +[Here's a thorough comparison](https://medium.com/@_willfalcon/pytorch-lightning-vs-pytorch-ignite-vs-fast-ai-61dc7480ad8a). -**How does Lightning compare with Ignite and fast.ai?** -[Here's a thorough comparison](https://medium.com/@_willfalcon/pytorch-lightning-vs-pytorch-ignite-vs-fast-ai-61dc7480ad8a). +**Is this another library I have to learn?** +Nope! We use pure Pytorch everywhere and don't add unecessary abstractions! -**Is this another library I have to learn?** -Nope! We use pure Pytorch everywhere and don't add unecessary abstractions! +**Are there plans to support Python 2?** +Nope. -**Are there plans to support Python 2?** -Nope. +**Are there plans to support virtualenv?** +Nope. Please use anaconda or miniconda. -**Are there plans to support virtualenv?** -Nope. Please use anaconda or miniconda. +**Which PyTorch versions do you support?** +- **PyTorch 1.1.0** + ```bash + # install pytorch 1.1.0 using the official instructions + + # install test-tube 0.6.7.6 which supports 1.1.0 + pip install test-tube==0.6.7.6 -**Which PyTorch versions do you support?** -- **PyTorch 1.1.0** - ```bash - # install pytorch 1.1.0 using the official instructions - - # install test-tube 0.6.7.6 which supports 1.1.0 - pip install test-tube==0.6.7.6 - - # install latest Lightning version without upgrading deps + # install latest Lightning version without upgrading deps pip install -U --no-deps pytorch-lightning - ``` + ``` - **PyTorch 1.2.0, 1.3.0,** - Install via pip as normal + Install via pip as normal ## Custom installation @@ -348,15 +350,15 @@ pip install https://github.com/PytorchLightning/pytorch-lightning/archive/0.X.Y. #### Leads - William Falcon [(williamFalcon)](https://github.com/williamFalcon) (Lightning founder) -- Jirka Borovec [(Borda)](https://github.com/Borda) (-_-) +- Jirka Borovec [(Borda)](https://github.com/Borda) (ghost :) - Ethan Harris [(ethanwharris)](https://github.com/ethanwharris) (Torchbearer founder) - Matthew Painter [(MattPainter01)](https://github.com/MattPainter01) (Torchbearer founder) #### Core Maintainers - Nick Eggert [(neggert)](https://github.com/neggert) -- Jeremy Jordan [(jeremyjordan)](https://github.com/jeremyjordan) - Jeff Ling [(jeffling)](https://github.com/jeffling) +- Jeremy Jordan [(jeremyjordan)](https://github.com/jeremyjordan) - Tullie Murrell [(tullie)](https://github.com/tullie) ## Bibtex diff --git a/docs/source/_images/lightning_module/pt_to_pl.png b/docs/source/_images/lightning_module/pt_to_pl.png index 8b35beed073e7..c5b24093f8311 100644 Binary files a/docs/source/_images/lightning_module/pt_to_pl.png and b/docs/source/_images/lightning_module/pt_to_pl.png differ diff --git a/docs/source/child_modules.rst b/docs/source/child_modules.rst index 6ea0c59951f9a..49fe6f463c373 100644 --- a/docs/source/child_modules.rst +++ b/docs/source/child_modules.rst @@ -24,7 +24,7 @@ that change in the `Autoencoder` model are the init, forward, training, validati x, _ = batch representation = self.encoder(x) - x_hat = self.forward(representation) + x_hat = self(representation) loss = MSE(x, x_hat) return loss @@ -38,7 +38,7 @@ that change in the `Autoencoder` model are the init, forward, training, validati def _shared_eval(self, batch, batch_idx, prefix): x, y = batch representation = self.encoder(x) - x_hat = self.forward(representation) + x_hat = self(representation) loss = F.nll_loss(logits, y) return {f'{prefix}_loss': loss} diff --git a/docs/source/fast_training.rst b/docs/source/fast_training.rst index 4500ebde88dc6..b741107ca17b4 100644 --- a/docs/source/fast_training.rst +++ b/docs/source/fast_training.rst @@ -1,10 +1,10 @@ Fast Training -================ +============= There are multiple options to speed up different parts of the training by choosing to train on a subset of data. This could be done for speed or debugging purposes. Check validation every n epochs -------------------------------------- +------------------------------- If you have a small dataset you might want to check validation every n epochs .. code-block:: python @@ -13,7 +13,7 @@ If you have a small dataset you might want to check validation every n epochs trainer = Trainer(check_val_every_n_epoch=1) Force training for min or max epochs -------------------------------------- +------------------------------------ It can be useful to force training for a minimum number of epochs or limit to a max number. .. seealso:: @@ -26,7 +26,7 @@ It can be useful to force training for a minimum number of epochs or limit to a Set validation check frequency within 1 training epoch -------------------------------------------------------- +------------------------------------------------------ For large datasets it's often desirable to check validation multiple times within a training loop. Pass in a float to check that often within 1 training epoch. Pass in an int k to check every k training batches. Must use an int if using an IterableDataset. @@ -43,7 +43,7 @@ Must use an int if using an IterableDataset. trainer = Trainer(val_check_interval=100) Use training data subset ----------------------------------- +------------------------ If you don't want to check 100% of the training set (for debugging or if it's huge), set this flag. .. code-block:: python @@ -54,12 +54,11 @@ If you don't want to check 100% of the training set (for debugging or if it's hu # check 10% only trainer = Trainer(train_percent_check=0.1) -.. note:: train_percent_check will be overwritten by overfit_pct if overfit_pct > 0 +.. note:: ``train_percent_check`` will be overwritten by ``overfit_pct`` if ``overfit_pct`` > 0. Use test data subset -------------------------------------- -If you don't want to check 100% of the test set (for debugging or if it's huge), set this flag -test_percent_check will be overwritten by overfit_pct if overfit_pct > 0. +-------------------- +If you don't want to check 100% of the test set (for debugging or if it's huge), set this flag. .. code-block:: python @@ -69,10 +68,11 @@ test_percent_check will be overwritten by overfit_pct if overfit_pct > 0. # check 10% only trainer = Trainer(test_percent_check=0.1) +.. note:: ``test_percent_check`` will be overwritten by ``overfit_pct`` if ``overfit_pct`` > 0. + Use validation data subset --------------------------------------------- -If you don't want to check 100% of the validation set (for debugging or if it's huge), set this flag -val_percent_check will be overwritten by overfit_pct if overfit_pct > 0 +-------------------------- +If you don't want to check 100% of the validation set (for debugging or if it's huge), set this flag. .. code-block:: python @@ -80,4 +80,7 @@ val_percent_check will be overwritten by overfit_pct if overfit_pct > 0 trainer = Trainer(val_percent_check=1.0) # check 10% only - trainer = Trainer(val_percent_check=0.1) \ No newline at end of file + trainer = Trainer(val_percent_check=0.1) + +.. note:: ``val_percent_check`` will be overwritten by ``overfit_pct`` if ``overfit_pct`` > 0 and ignored if + ``fast_dev_run=True``. \ No newline at end of file diff --git a/docs/source/governance.rst b/docs/source/governance.rst index ad0d41ef43836..1c4da1f4a5b73 100644 --- a/docs/source/governance.rst +++ b/docs/source/governance.rst @@ -12,4 +12,5 @@ Core Maintainers ---------------- - Nic Eggert (`neggert `_) - Jeff Ling (`jeffling `_) +- Jeremy Jordan (`jeremyjordan `_) - Tullie Murrell (`tullie `_) diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index ea58b9e6eb6d3..8aea09a77fc68 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -32,7 +32,7 @@ Now we can parametrize the LightningModule. class LitMNIST(pl.LightningModule): def __init__(self, hparams): - super(LitMNIST, self).__init__() + super().__init__() self.hparams = hparams self.layer_1 = torch.nn.Linear(28 * 28, hparams.layer_1_dim) @@ -140,7 +140,7 @@ polluting the main.py file, the LightningModule lets you define arguments for ea class LitMNIST(pl.LightningModule): def __init__(self, hparams): - super(LitMNIST, self).__init__() + super().__init__() self.layer_1 = torch.nn.Linear(28 * 28, hparams.layer_1_dim) @staticmethod @@ -151,7 +151,7 @@ polluting the main.py file, the LightningModule lets you define arguments for ea class GoodGAN(pl.LightningModule): def __init__(self, hparams): - super(GoodGAN, self).__init__() + super().__init__() self.encoder = Encoder(layers=hparams.encoder_layers) @staticmethod diff --git a/docs/source/introduction_guide.rst b/docs/source/introduction_guide.rst index c0453839519c5..ff9b1d39e4f89 100644 --- a/docs/source/introduction_guide.rst +++ b/docs/source/introduction_guide.rst @@ -119,7 +119,7 @@ a 3-layer neural network. class LitMNIST(pl.LightningModule): def __init__(self): - super(LitMNIST, self).__init__() + super().__init__() # mnist images are (1, 28, 28) (channels, width, height) self.layer_1 = torch.nn.Linear(28 * 28, 128) @@ -319,7 +319,7 @@ in the LightningModule def training_step(self, batch, batch_idx): x, y = batch - logits = self.forward(x) + logits = self(x) loss = F.nll_loss(logits, y) return {'loss': loss} # return loss (also works) @@ -344,7 +344,7 @@ For clarity, we'll recall that the full LightningModule now looks like this. class LitMNIST(pl.LightningModule): def __init__(self): - super(LitMNIST, self).__init__() + super().__init__() self.layer_1 = torch.nn.Linear(28 * 28, 128) self.layer_2 = torch.nn.Linear(128, 256) self.layer_3 = torch.nn.Linear(256, 10) @@ -371,7 +371,7 @@ For clarity, we'll recall that the full LightningModule now looks like this. def training_step(self, batch, batch_idx): x, y = batch - logits = self.forward(x) + logits = self(x) loss = F.nll_loss(logits, y) # add logging @@ -602,7 +602,7 @@ Now we can parametrize the LightningModule. class LitMNIST(pl.LightningModule): def __init__(self, hparams): - super(LitMNIST, self).__init__() + super().__init__() self.hparams = hparams self.layer_1 = torch.nn.Linear(28 * 28, hparams.layer_1_dim) @@ -684,7 +684,7 @@ sample split in the `train_dataloader` method. class LitMNIST(pl.LightningModule): def validation_step(self, batch, batch_idx): x, y = batch - logits = self.forward(x) + logits = self(x) loss = F.nll_loss(logits, y) return {'val_loss': loss} @@ -740,7 +740,7 @@ Just like the validation loop, we define exactly the same steps for testing: class LitMNIST(pl.LightningModule): def test_step(self, batch, batch_idx): x, y = batch - logits = self.forward(x) + logits = self(x) loss = F.nll_loss(logits, y) return {'val_loss': loss} @@ -827,7 +827,7 @@ within it. def training_step(self, batch, batch_idx): x, y = batch - logits = self.forward(x) + logits = self(x) loss = F.nll_loss(logits, y) return loss @@ -855,7 +855,7 @@ In this case, we've set this LightningModel to predict logits. But we could also def training_step(self, batch, batch_idx): x, y = batch - out, l1_feats, l2_feats, l3_feats = self.forward(x) + out, l1_feats, l2_feats, l3_feats = self(x) logits = torch.log_softmax(out, dim=1) ce_loss = F.nll_loss(logits, y) loss = perceptual_loss(l1_feats, l2_feats, l3_feats) + ce_loss @@ -880,7 +880,7 @@ Or maybe we have a model that we use to do generation def training_step(self, batch, batch_idx): x, y = batch representation = self.encoder(x) - imgs = self.forward(representation) + imgs = self(representation) loss = perceptual_loss(imgs, x) return loss @@ -993,4 +993,3 @@ And pass the callbacks into the trainer --------- .. include:: transfer_learning.rst - diff --git a/docs/source/multi_gpu.rst b/docs/source/multi_gpu.rst index 0f51a654f0d68..6b8f15b736443 100644 --- a/docs/source/multi_gpu.rst +++ b/docs/source/multi_gpu.rst @@ -207,7 +207,7 @@ to illustrate why this is needed, let's look at dataparallel def training_step(self, batch, batch_idx): x, y = batch - y_hat = self.forward(batch) + y_hat = self(batch) # on dp or ddp2 if we did softmax now it would be wrong # because batch is actually a piece of the full batch diff --git a/docs/source/transfer_learning.rst b/docs/source/transfer_learning.rst index 9737d7d820a00..d5a9509f4a014 100644 --- a/docs/source/transfer_learning.rst +++ b/docs/source/transfer_learning.rst @@ -97,7 +97,7 @@ Here's a model that uses `Huggingface transformers >> # define simple Net for MNIST dataset + >>> params = dict( + ... drop_prob=0.2, + ... batch_size=2, + ... in_features=28 * 28, + ... learning_rate=0.001 * 8, + ... optimizer_name='adam', + ... data_root='./datasets', + ... out_features=10, + ... hidden_dim=1000, + ... ) + >>> from argparse import Namespace + >>> hparams = Namespace(**params) + >>> model = LightningTemplateModel(hparams) """ def __init__(self, hparams): @@ -28,7 +45,7 @@ def __init__(self, hparams): :param hparams: """ # init superclass - super(LightningTemplateModel, self).__init__() + super().__init__() self.hparams = hparams self.batch_size = hparams.batch_size @@ -89,7 +106,7 @@ def training_step(self, batch, batch_idx): x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) # calculate loss loss_val = self.loss(y, y_hat) @@ -116,7 +133,7 @@ def validation_step(self, batch, batch_idx): """ x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) loss_val = self.loss(y, y_hat) diff --git a/pl_examples/domain_templates/gan.py b/pl_examples/domain_templates/gan.py index 0d7f7834b6faf..68e6053e7e822 100644 --- a/pl_examples/domain_templates/gan.py +++ b/pl_examples/domain_templates/gan.py @@ -25,7 +25,7 @@ class Generator(nn.Module): def __init__(self, latent_dim, img_shape): - super(Generator, self).__init__() + super().__init__() self.img_shape = img_shape def block(in_feat, out_feat, normalize=True): @@ -52,7 +52,7 @@ def forward(self, z): class Discriminator(nn.Module): def __init__(self, img_shape): - super(Discriminator, self).__init__() + super().__init__() self.model = nn.Sequential( nn.Linear(int(np.prod(img_shape)), 512), @@ -73,7 +73,7 @@ def forward(self, img): class GAN(LightningModule): def __init__(self, hparams): - super(GAN, self).__init__() + super().__init__() self.hparams = hparams # networks @@ -105,7 +105,7 @@ def training_step(self, batch, batch_idx, optimizer_idx): z = z.cuda(imgs.device.index) # generate images - self.generated_imgs = self.forward(z) + self.generated_imgs = self(z) # log sampled images # sample_imgs = self.generated_imgs[:6] @@ -179,7 +179,7 @@ def on_epoch_end(self): z = z.cuda(self.last_imgs.device.index) # log sampled images - sample_imgs = self.forward(z) + sample_imgs = self(z) grid = torchvision.utils.make_grid(sample_imgs) self.logger.experiment.add_image(f'generated_images', grid, self.current_epoch) diff --git a/pl_examples/domain_templates/reinforse_learn_Qnet.py b/pl_examples/domain_templates/reinforse_learn_Qnet.py new file mode 100644 index 0000000000000..4585c108d5cfb --- /dev/null +++ b/pl_examples/domain_templates/reinforse_learn_Qnet.py @@ -0,0 +1,360 @@ +""" +# Deep Reinforcement Learning: Deep Q-network (DQN) + +this example is based off https://github.com/PacktPublishing/Deep-Reinforcement-Learning-Hands-On- +Second-Edition/blob/master/Chapter06/02_dqn_pong.py + +The template illustrates using Lightning for Reinforcement Learning. The example builds a basic DQN using the +classic CartPole environment. + +to run the template just run: +python dqn.py + +After ~1500 steps, you will see the total_reward hitting the max score of 200. Open up tensor boards to +see the metrics. + +tensorboard --logdir default +""" + +import pytorch_lightning as pl + +from typing import Tuple, List + +import argparse +from collections import OrderedDict, deque, namedtuple + +import gym +import numpy as np +import torch +import torch.nn as nn +import torch.optim as optim +from torch.optim import Optimizer +from torch.utils.data import DataLoader +from torch.utils.data.dataset import IterableDataset + + +class DQN(nn.Module): + """ + Simple MLP network + + Args: + obs_size: observation/state size of the environment + n_actions: number of discrete actions available in the environment + hidden_size: size of hidden layers + """ + + def __init__(self, obs_size: int, n_actions: int, hidden_size: int = 128): + super(DQN, self).__init__() + self.net = nn.Sequential( + nn.Linear(obs_size, hidden_size), + nn.ReLU(), + nn.Linear(hidden_size, n_actions) + ) + + def forward(self, x): + return self.net(x.float()) + + +# Named tuple for storing experience steps gathered in training +Experience = namedtuple( + 'Experience', field_names=['state', 'action', 'reward', + 'done', 'new_state']) + + +class ReplayBuffer: + """ + Replay Buffer for storing past experiences allowing the agent to learn from them + + Args: + capacity: size of the buffer + """ + + def __init__(self, capacity: int) -> None: + self.buffer = deque(maxlen=capacity) + + def __len__(self) -> None: + return len(self.buffer) + + def append(self, experience: Experience) -> None: + """ + Add experience to the buffer + + Args: + experience: tuple (state, action, reward, done, new_state) + """ + self.buffer.append(experience) + + def sample(self, batch_size: int) -> Tuple: + indices = np.random.choice(len(self.buffer), batch_size, replace=False) + states, actions, rewards, dones, next_states = zip(*[self.buffer[idx] for idx in indices]) + + return (np.array(states), np.array(actions), np.array(rewards, dtype=np.float32), + np.array(dones, dtype=np.bool), np.array(next_states)) + + +class RLDataset(IterableDataset): + """ + Iterable Dataset containing the ExperienceBuffer + which will be updated with new experiences during training + + Args: + buffer: replay buffer + sample_size: number of experiences to sample at a time + """ + + def __init__(self, buffer: ReplayBuffer, sample_size: int = 200) -> None: + self.buffer = buffer + self.sample_size = sample_size + + def __iter__(self) -> Tuple: + states, actions, rewards, dones, new_states = self.buffer.sample(self.sample_size) + for i in range(len(dones)): + yield states[i], actions[i], rewards[i], dones[i], new_states[i] + + +class Agent: + """ + Base Agent class handeling the interaction with the environment + + Args: + env: training environment + replay_buffer: replay buffer storing experiences + """ + + def __init__(self, env: gym.Env, replay_buffer: ReplayBuffer) -> None: + self.env = env + self.replay_buffer = replay_buffer + self.reset() + self.state = self.env.reset() + + def reset(self) -> None: + """ Resents the environment and updates the state""" + self.state = self.env.reset() + + def get_action(self, net: nn.Module, epsilon: float, device: str) -> int: + """ + Using the given network, decide what action to carry out + using an epsilon-greedy policy + + Args: + net: DQN network + epsilon: value to determine likelihood of taking a random action + device: current device + + Returns: + action + """ + if np.random.random() < epsilon: + action = self.env.action_space.sample() + else: + state = torch.tensor([self.state]) + + if device not in ['cpu']: + state = state.cuda(device) + + q_values = net(state) + _, action = torch.max(q_values, dim=1) + action = int(action.item()) + + return action + + @torch.no_grad() + def play_step(self, net: nn.Module, epsilon: float = 0.0, device: str = 'cpu') -> Tuple[float, bool]: + """ + Carries out a single interaction step between the agent and the environment + + Args: + net: DQN network + epsilon: value to determine likelihood of taking a random action + device: current device + + Returns: + reward, done + """ + + action = self.get_action(net, epsilon, device) + + # do step in the environment + new_state, reward, done, _ = self.env.step(action) + + exp = Experience(self.state, action, reward, done, new_state) + + self.replay_buffer.append(exp) + + self.state = new_state + if done: + self.reset() + return reward, done + + +class DQNLightning(pl.LightningModule): + """ Basic DQN Model """ + + def __init__(self, hparams: argparse.Namespace) -> None: + super().__init__() + self.hparams = hparams + + self.env = gym.make(self.hparams.env) + obs_size = self.env.observation_space.shape[0] + n_actions = self.env.action_space.n + + self.net = DQN(obs_size, n_actions) + self.target_net = DQN(obs_size, n_actions) + + self.buffer = ReplayBuffer(self.hparams.replay_size) + self.agent = Agent(self.env, self.buffer) + self.total_reward = 0 + self.episode_reward = 0 + self.populate(self.hparams.warm_start_steps) + + def populate(self, steps: int = 1000) -> None: + """ + Carries out several random steps through the environment to initially fill + up the replay buffer with experiences + + Args: + steps: number of random steps to populate the buffer with + """ + for i in range(steps): + self.agent.play_step(self.net, epsilon=1.0) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Passes in a state x through the network and gets the q_values of each action as an output + + Args: + x: environment state + + Returns: + q values + """ + output = self.net(x) + return output + + def dqn_mse_loss(self, batch: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor: + """ + Calculates the mse loss using a mini batch from the replay buffer + + Args: + batch: current mini batch of replay data + + Returns: + loss + """ + states, actions, rewards, dones, next_states = batch + + state_action_values = self.net(states).gather(1, actions.unsqueeze(-1)).squeeze(-1) + + with torch.no_grad(): + next_state_values = self.target_net(next_states).max(1)[0] + next_state_values[dones] = 0.0 + next_state_values = next_state_values.detach() + + expected_state_action_values = next_state_values * self.hparams.gamma + rewards + + return nn.MSELoss()(state_action_values, expected_state_action_values) + + def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], nb_batch) -> OrderedDict: + """ + Carries out a single step through the environment to update the replay buffer. + Then calculates loss based on the minibatch recieved + + Args: + batch: current mini batch of replay data + nb_batch: batch number + + Returns: + Training loss and log metrics + """ + device = self.get_device(batch) + epsilon = max(self.hparams.eps_end, self.hparams.eps_start - + self.global_step + 1 / self.hparams.eps_last_frame) + + # step through environment with agent + reward, done = self.agent.play_step(self.net, epsilon, device) + self.episode_reward += reward + + # calculates training loss + loss = self.dqn_mse_loss(batch) + + if self.trainer.use_dp or self.trainer.use_ddp2: + loss = loss.unsqueeze(0) + + if done: + self.total_reward = self.episode_reward + self.episode_reward = 0 + + # Soft update of target network + if self.global_step % self.hparams.sync_rate == 0: + self.target_net.load_state_dict(self.net.state_dict()) + + log = {'total_reward': torch.tensor(self.total_reward).to(device), + 'reward': torch.tensor(reward).to(device), + 'steps': torch.tensor(self.global_step).to(device)} + + return OrderedDict({'loss': loss, 'log': log, 'progress_bar': log}) + + def configure_optimizers(self) -> List[Optimizer]: + """ Initialize Adam optimizer""" + optimizer = optim.Adam(self.net.parameters(), lr=self.hparams.lr) + return [optimizer] + + def __dataloader(self) -> DataLoader: + """Initialize the Replay Buffer dataset used for retrieving experiences""" + dataset = RLDataset(self.buffer, self.hparams.episode_length) + dataloader = DataLoader(dataset=dataset, + batch_size=self.hparams.batch_size, + sampler=None + ) + return dataloader + + def train_dataloader(self) -> DataLoader: + """Get train loader""" + return self.__dataloader() + + def get_device(self, batch) -> str: + """Retrieve device currently being used by minibatch""" + return batch[0].device.index if self.on_gpu else 'cpu' + + +def main(hparams) -> None: + model = DQNLightning(hparams) + + trainer = pl.Trainer( + gpus=1, + distributed_backend='dp', + early_stop_callback=False, + val_check_interval=100 + ) + + trainer.fit(model) + + +if __name__ == '__main__': + torch.manual_seed(0) + np.random.seed(0) + + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int, default=16, help="size of the batches") + parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") + parser.add_argument("--env", type=str, default="CartPole-v0", help="gym environment tag") + parser.add_argument("--gamma", type=float, default=0.99, help="discount factor") + parser.add_argument("--sync_rate", type=int, default=10, + help="how many frames do we update the target network") + parser.add_argument("--replay_size", type=int, default=1000, + help="capacity of the replay buffer") + parser.add_argument("--warm_start_size", type=int, default=1000, + help="how many samples do we use to fill our buffer at the start of training") + parser.add_argument("--eps_last_frame", type=int, default=1000, + help="what frame should epsilon stop decaying") + parser.add_argument("--eps_start", type=float, default=1.0, help="starting value of epsilon") + parser.add_argument("--eps_end", type=float, default=0.01, help="final value of epsilon") + parser.add_argument("--episode_length", type=int, default=200, help="max length of an episode") + parser.add_argument("--max_episode_reward", type=int, default=200, + help="max episode reward in the environment") + parser.add_argument("--warm_start_steps", type=int, default=1000, + help="max episode reward in the environment") + + args = parser.parse_args() + + main(args) diff --git a/pl_examples/full_examples/imagenet/imagenet_example.py b/pl_examples/full_examples/imagenet/imagenet_example.py index 646d092ddb54d..ad8f90f5a10b6 100644 --- a/pl_examples/full_examples/imagenet/imagenet_example.py +++ b/pl_examples/full_examples/imagenet/imagenet_example.py @@ -33,7 +33,7 @@ def __init__(self, hparams): """ TODO: add docstring here """ - super(ImageNetLightningModel, self).__init__() + super().__init__() self.hparams = hparams self.model = models.__dict__[self.hparams.arch](pretrained=self.hparams.pretrained) @@ -42,7 +42,7 @@ def forward(self, x): def training_step(self, batch, batch_idx): images, target = batch - output = self.forward(images) + output = self(images) loss_val = F.cross_entropy(output, target) acc1, acc5 = self.__accuracy(output, target, topk=(1, 5)) @@ -65,7 +65,7 @@ def training_step(self, batch, batch_idx): def validation_step(self, batch, batch_idx): images, target = batch - output = self.forward(images) + output = self(images) loss_val = F.cross_entropy(output, target) acc1, acc5 = self.__accuracy(output, target, topk=(1, 5)) diff --git a/pl_examples/full_examples/semantic_segmentation/models/unet/model.py b/pl_examples/full_examples/semantic_segmentation/models/unet/model.py index 36890aa95c9c0..484c6982c7c01 100644 --- a/pl_examples/full_examples/semantic_segmentation/models/unet/model.py +++ b/pl_examples/full_examples/semantic_segmentation/models/unet/model.py @@ -9,9 +9,9 @@ class UNet(nn.Module): Link - https://arxiv.org/abs/1505.04597 Parameters: - num_classes (int) - Number of output classes required (default 19 for KITTI dataset) - bilinear (bool) - Whether to use bilinear interpolation or transposed - convolutions for upsampling. + num_classes (int) - Number of output classes required (default 19 for KITTI dataset) + bilinear (bool) - Whether to use bilinear interpolation or transposed + convolutions for upsampling. ''' def __init__(self, num_classes=19, bilinear=False): diff --git a/pl_examples/full_examples/semantic_segmentation/semseg.py b/pl_examples/full_examples/semantic_segmentation/semseg.py index 8f25243cffb96..dfac9e9cd64de 100644 --- a/pl_examples/full_examples/semantic_segmentation/semseg.py +++ b/pl_examples/full_examples/semantic_segmentation/semseg.py @@ -123,7 +123,7 @@ class SegModel(pl.LightningModule): ''' def __init__(self, hparams): - super(SegModel, self).__init__() + super().__init__() self.root_path = hparams.root self.batch_size = hparams.batch_size self.learning_rate = hparams.lr @@ -143,7 +143,7 @@ def training_step(self, batch, batch_nb): img, mask = batch img = img.float() mask = mask.long() - out = self.forward(img) + out = self(img) loss_val = F.cross_entropy(out, mask, ignore_index=250) return {'loss': loss_val} diff --git a/pl_examples/requirements.txt b/pl_examples/requirements.txt index d9f4c0d808165..24506bbba7964 100644 --- a/pl_examples/requirements.txt +++ b/pl_examples/requirements.txt @@ -1 +1,2 @@ -torchvision>=0.4.0 \ No newline at end of file +torchvision>=0.4.0 +gym>=0.17.0 \ No newline at end of file diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py index ff03a2d32ee61..c30f3b5ed37ba 100644 --- a/pytorch_lightning/core/__init__.py +++ b/pytorch_lightning/core/__init__.py @@ -74,7 +74,7 @@ class LitModel(pl.LightningModule): def __init__(self): - super(LitModel, self).__init__() + super().__init__() self.l1 = torch.nn.Linear(28 * 28, 10) def forward(self, x): @@ -82,7 +82,7 @@ def forward(self, x): def training_step(self, batch, batch_idx): x, y = batch - y_hat = self.forward(x) + y_hat = self(x) return {'loss': F.cross_entropy(y_hat, y)} def train_dataloader(self): @@ -159,7 +159,7 @@ def configure_optimizers(self): class LitModel(pl.LightningModule): def validation_step(self, batch, batch_idx): x, y = batch - y_hat = self.forward(x) + y_hat = self(x) return {'val_loss': F.cross_entropy(y_hat, y)} def validation_epoch_end(self, outputs): @@ -178,7 +178,7 @@ def val_dataloader(self): class LitModel(pl.LightningModule): def test_step(self, batch, batch_idx): x, y = batch - y_hat = self.forward(x) + y_hat = self(x) return {'test_loss': F.cross_entropy(y_hat, y)} def test_epoch_end(self, outputs): diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 38a4953cbbc2e..9cb5171d9f603 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -7,8 +7,8 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union import torch -from torch import Tensor import torch.distributed as torch_distrib +from torch import Tensor from torch.nn.parallel import DistributedDataParallel from torch.optim import Adam from torch.optim.optimizer import Optimizer @@ -33,7 +33,7 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks): def __init__(self, *args, **kwargs): - super(LightningModule, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) #: Current dtype self.dtype = torch.FloatTensor @@ -97,7 +97,7 @@ def forward(self, *args, **kwargs): Same as torch.nn.Module.forward(), however in Lightning you want this to define the operations you want to use for prediction (ie: on a server or as a feature extractor). - Normally you'd call self.forward() from your training_step() method. + Normally you'd call self() from your training_step() method. This makes it easy to write a complex system for training with the outputs you'd want in a prediction setting. @@ -117,7 +117,7 @@ def forward(self, x): def training_step(self, batch, batch_idx): x, y = batch - feature_maps = self.forward(x) + feature_maps = self(x) logits = self.classifier(feature_maps) # ... @@ -171,7 +171,7 @@ def training_step(self, batch, batch_idx): x, y, z = batch # implement your own - out = self.forward(x) + out = self(x) loss = self.loss(out, x) logger_logs = {'training_loss': loss} # optional (MUST ALL BE TENSORS) @@ -220,6 +220,10 @@ def training_step(self, batch, batch_idx, hiddens): You can also return a -1 instead of a dict to stop the current loop. This is useful if you want to break out of the current training epoch early. + + Notes: + The presented loss value in progress bar is smooth (average) over last values, + so it differs from values set in train/validation step. """ def training_end(self, *args, **kwargs): @@ -266,7 +270,7 @@ def training_step(self, batch, batch_idx): # batch is 1/num_gpus big x, y = batch - out = self.forward(x) + out = self(x) loss = self.softmax(out) loss = nce_loss(loss) return {'loss': loss} @@ -277,7 +281,7 @@ def training_step(self, batch, batch_idx): # batch is 1/num_gpus big x, y = batch - out = self.forward(x) + out = self(x) return {'out': out} def training_step_end(self, outputs): @@ -303,7 +307,7 @@ def validation_step(self, *args, **kwargs) -> Dict[str, Tensor]: val_outs = [] for val_batch in val_data: out = validation_step(train_batch) - val_outs.append(out + val_outs.append(out) validation_epoch_end(val_outs) Args: @@ -342,7 +346,7 @@ def validation_step(self, batch, batch_idx): x, y = batch # implement your own - out = self.forward(x) + out = self(x) loss = self.loss(out, y) # log 6 example images @@ -413,7 +417,7 @@ def validation_step(self, batch, batch_idx): # batch is 1/num_gpus big x, y = batch - out = self.forward(x) + out = self(x) loss = self.softmax(out) loss = nce_loss(loss) return {'loss': loss} @@ -424,7 +428,7 @@ def validation_step(self, batch, batch_idx): # batch is 1/num_gpus big x, y = batch - out = self.forward(x) + out = self(x) return {'out': out} def validation_epoch_end(self, outputs): @@ -564,7 +568,7 @@ def test_step(self, batch, batch_idx): x, y = batch # implement your own - out = self.forward(x) + out = self(x) loss = self.loss(out, y) # log 6 example images @@ -636,7 +640,7 @@ def test_step(self, batch, batch_idx): # batch is 1/num_gpus big x, y = batch - out = self.forward(x) + out = self(x) loss = self.softmax(out) loss = nce_loss(loss) return {'loss': loss} @@ -647,7 +651,7 @@ def test_step(self, batch, batch_idx): # batch is 1/num_gpus big x, y = batch - out = self.forward(x) + out = self(x) return {'out': out} def test_step_end(self, outputs): diff --git a/pytorch_lightning/logging/comet.py b/pytorch_lightning/logging/comet.py index 48a426dd4d53e..3e09a1cfc1979 100644 --- a/pytorch_lightning/logging/comet.py +++ b/pytorch_lightning/logging/comet.py @@ -2,4 +2,9 @@ .. warning:: `logging` package has been renamed to `loggers` since v0.7.0 and will be removed in v0.9.0 """ +import warnings + +warnings.warn("`logging.comet` module has been renamed to `loggers.comet` since v0.7.0." + " The deprecated module name will be removed in v0.9.0.", DeprecationWarning) + from pytorch_lightning.loggers.comet import CometLogger # noqa: F403 diff --git a/pytorch_lightning/logging/mlflow.py b/pytorch_lightning/logging/mlflow.py index 895f41fc5175a..c91faec47f995 100644 --- a/pytorch_lightning/logging/mlflow.py +++ b/pytorch_lightning/logging/mlflow.py @@ -2,4 +2,9 @@ .. warning:: `logging` package has been renamed to `loggers` since v0.7.0 and will be removed in v0.9.0 """ +import warnings + +warnings.warn("`logging.mlflow` module has been renamed to `loggers.mlflow` since v0.7.0." + " The deprecated module name will be removed in v0.9.0.", DeprecationWarning) + from pytorch_lightning.loggers.mlflow import MLFlowLogger # noqa: F403 diff --git a/pytorch_lightning/logging/neptune.py b/pytorch_lightning/logging/neptune.py index f1b64525fe160..f1e8a81b55180 100644 --- a/pytorch_lightning/logging/neptune.py +++ b/pytorch_lightning/logging/neptune.py @@ -2,4 +2,9 @@ .. warning:: `logging` package has been renamed to `loggers` since v0.7.0 and will be removed in v0.9.0 """ +import warnings + +warnings.warn("`logging.neptune` module has been renamed to `loggers.neptune` since v0.7.0." + " The deprecated module name will be removed in v0.9.0.", DeprecationWarning) + from pytorch_lightning.loggers.neptune import NeptuneLogger # noqa: F403 diff --git a/pytorch_lightning/logging/test_tube.py b/pytorch_lightning/logging/test_tube.py index a9bc71e4885dd..c40b7d187ab1b 100644 --- a/pytorch_lightning/logging/test_tube.py +++ b/pytorch_lightning/logging/test_tube.py @@ -2,4 +2,9 @@ .. warning:: `logging` package has been renamed to `loggers` since v0.7.0 and will be removed in v0.9.0 """ +import warnings + +warnings.warn("`logging.test_tube` module has been renamed to `loggers.test_tube` since v0.7.0." + " The deprecated module name will be removed in v0.9.0.", DeprecationWarning) + from pytorch_lightning.loggers.test_tube import TestTubeLogger # noqa: F403 diff --git a/pytorch_lightning/logging/wandb.py b/pytorch_lightning/logging/wandb.py index e4527b7b8734a..0ce8679215b9a 100644 --- a/pytorch_lightning/logging/wandb.py +++ b/pytorch_lightning/logging/wandb.py @@ -2,4 +2,9 @@ .. warning:: `logging` package has been renamed to `loggers` since v0.7.0 and will be removed in v0.9.0 """ +import warnings + +warnings.warn("`logging.wandb` module has been renamed to `loggers.wandb` since v0.7.0." + " The deprecated module name will be removed in v0.9.0.", DeprecationWarning) + from pytorch_lightning.loggers.wandb import WandbLogger # noqa: F403 diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 881a2e9103301..8a4d4dc6cd3e0 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -876,7 +876,8 @@ def run_pretrain_routine(self, model: LightningModule): return # check if we should run validation during training - self.disable_validation = not self.is_overriden('validation_step') and not self.fast_dev_run + self.disable_validation = not (self.is_overriden('validation_step') and self.val_percent_check > 0) \ + and not self.fast_dev_run # run tiny validation (if validation defined) # to make sure program won't crash during val @@ -908,7 +909,7 @@ def run_pretrain_routine(self, model: LightningModule): # init progress bar pbar = tqdm(leave=True, position=2 * self.process_position, disable=not self.show_progress_bar, dynamic_ncols=True, - file=sys.stdout) + file=sys.stdout, smoothing=0) self.main_progress_bar = pbar # clear cache before training diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 7d0f22def1197..99264b7b5b441 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -17,7 +17,7 @@ # DEFAULTS used by the Trainer checkpoint_callback = ModelCheckpoint( filepath=os.getcwd(), - save_best_only=True, + save_top_k=1, verbose=True, monitor='val_loss', mode='min', diff --git a/requirements-extra.txt b/requirements-extra.txt index c3d1d6b28ce64..8b720f7dc33e6 100644 --- a/requirements-extra.txt +++ b/requirements-extra.txt @@ -1,6 +1,8 @@ +# extended list of package dependencies to reach full functionality + neptune-client>=0.4.4 comet-ml>=1.0.56 mlflow>=1.0.0 test_tube>=0.7.5 wandb>=0.8.21 -trains>=0.14.1rc0 +trains>=0.14.1 diff --git a/requirements.txt b/requirements.txt index 6d99913a60f4d..81441b367e36c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +# the default package dependencies + tqdm>=4.41.0 numpy>=1.16.4 torch>=1.1 diff --git a/setup.cfg b/setup.cfg index 264706053ec51..8759d159fd0a4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,7 +5,7 @@ norecursedirs = build python_files = test_*.py -doctest_plus = disabled +# doctest_plus = disabled addopts = --strict markers = slow @@ -13,10 +13,6 @@ markers = filterwarnings gpus_param_tests -[pycodestyle] -ignore = E731,W504 -max-line-length = 120 - [coverage:report] exclude_lines = pragma: no-cover @@ -41,7 +37,7 @@ ignore = # setup.cfg or tox.ini [check-manifest] ignore = - .travis.yml + *.yml tox.ini .github .github/* @@ -51,3 +47,10 @@ ignore = license_file = LICENSE # long_description = file:README.md # long_description_content_type = text/markdown + +[pydocstyle] +convention = pep257 +# D104, D107: Ignore missing docstrings in __init__ files and methods. +# D202: Ignore a blank line after docstring (collision with Python Black in decorators) +add-ignore = D104,D107,D202 +max-line-length = 120 diff --git a/tests/Dockerfile b/tests/Dockerfile new file mode 100644 index 0000000000000..d0d2f4ca5abb2 --- /dev/null +++ b/tests/Dockerfile @@ -0,0 +1,7 @@ +ARG TORCH_VERSION=1.4 +ARG CUDA_VERSION=10.1 + +FROM pytorch/pytorch:${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn7-runtime + +# Install AMP +RUN bash ./tests/install_AMP.sh diff --git a/tests/README.md b/tests/README.md index 8835ab93922eb..0773c717d97eb 100644 --- a/tests/README.md +++ b/tests/README.md @@ -13,8 +13,8 @@ To run all tests do the following: git clone https://github.com/PyTorchLightning/pytorch-lightning cd pytorch-lightning -# install module locally -pip install -e . +# install AMP support +bash tests/install_AMP.sh # install dev deps pip install -r tests/requirements.txt @@ -36,15 +36,13 @@ Make sure to run coverage on a GPU machine with at least 2 GPUs and NVIDIA apex cd pytorch-lightning # generate coverage (coverage is also installed as part of dev dependencies under tests/requirements.txt) -pip install coverage coverage run --source pytorch_lightning -m py.test pytorch_lightning tests examples -v --doctest-modules # print coverage stats coverage report -m -# exporting resulys +# exporting results coverage xml -codecov -t 17327163-8cca-4a5d-86c8-ca5f2ef700bc -v ``` diff --git a/tests/base/__init__.py b/tests/base/__init__.py new file mode 100644 index 0000000000000..1e68469871d25 --- /dev/null +++ b/tests/base/__init__.py @@ -0,0 +1,57 @@ +"""Models for testing.""" + +import torch + +from tests.base.models import TestModelBase, DictHparamsModel +from tests.base.mixins import ( + LightEmptyTestStep, + LightValidationStepMixin, + LightValidationMixin, + LightValidationStepMultipleDataloadersMixin, + LightValidationMultipleDataloadersMixin, + LightTestStepMixin, + LightTestMixin, + LightTestStepMultipleDataloadersMixin, + LightTestMultipleDataloadersMixin, + LightTestFitSingleTestDataloadersMixin, + LightTestFitMultipleTestDataloadersMixin, + LightValStepFitSingleDataloaderMixin, + LightValStepFitMultipleDataloadersMixin, + LightTrainDataloader, + LightTestDataloader, + LightInfTrainDataloader, + LightInfValDataloader, + LightInfTestDataloader, + LightTestOptimizerWithSchedulingMixin, + LightTestMultipleOptimizersWithSchedulingMixin, + LightTestOptimizersWithMixedSchedulingMixin, + LightTestReduceLROnPlateauMixin +) + + +class LightningTestModel(LightTrainDataloader, + LightValidationMixin, + LightTestMixin, + TestModelBase): + """Most common test case. Validation and test dataloaders.""" + + def on_training_metrics(self, logs): + logs['some_tensor_to_test'] = torch.rand(1) + + +class LightningTestModelWithoutHyperparametersArg(LightningTestModel): + """ without hparams argument in constructor """ + + def __init__(self): + import tests.base.utils as tutils + + # the user loads the hparams in some other way + hparams = tutils.get_default_hparams() + super().__init__(hparams) + + +class LightningTestModelWithUnusedHyperparametersArg(LightningTestModelWithoutHyperparametersArg): + """ has hparams argument in constructor but is not used """ + + def __init__(self, hparams): + super().__init__() diff --git a/tests/models/debug.py b/tests/base/debug.py similarity index 88% rename from tests/models/debug.py rename to tests/base/debug.py index 3c200a52f2644..59abdd8503bde 100644 --- a/tests/models/debug.py +++ b/tests/base/debug.py @@ -7,14 +7,14 @@ # from test_models import assert_ok_test_acc, load_model, \ -# clear_save_dir, get_test_tube_logger, get_hparams, init_save_dir, \ +# clear_save_dir, get_default_testtube_logger, get_default_hparams, init_save_dir, \ # init_checkpoint_callback, reset_seed, set_random_master_port class CoolModel(pl.LightningModule): def __init(self): - super(CoolModel, self).__init__() + super().__init__() # not the best model... self.l1 = torch.nn.Linear(28 * 28, 10) @@ -26,12 +26,12 @@ def my_loss(self, y_hat, y): def training_step(self, batch, batch_idx): x, y = batch - y_hat = self.forward(x) + y_hat = self(x) return {'training_loss': self.my_loss(y_hat, y)} def validation_step(self, batch, batch_idx): x, y = batch - y_hat = self.forward(x) + y_hat = self(x) return {'val_loss': self.my_loss(y_hat, y)} def validation_epoch_end(self, outputs): diff --git a/tests/models/mixins.py b/tests/base/mixins.py similarity index 98% rename from tests/models/mixins.py rename to tests/base/mixins.py index 0be691726e209..1a05049f44f5f 100644 --- a/tests/models/mixins.py +++ b/tests/base/mixins.py @@ -21,7 +21,7 @@ def validation_step(self, batch, batch_idx, *args, **kwargs): """ x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) loss_val = self.loss(y, y_hat) @@ -114,7 +114,7 @@ def validation_step(self, batch, batch_idx, dataloader_idx, **kwargs): """ x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) loss_val = self.loss(y, y_hat) @@ -273,7 +273,7 @@ def test_step(self, batch, batch_idx, *args, **kwargs): """ x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) loss_test = self.loss(y, y_hat) @@ -360,7 +360,7 @@ def test_step(self, batch, batch_idx, dataloader_idx, **kwargs): """ x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) loss_test = self.loss(y, y_hat) @@ -413,7 +413,7 @@ def test_step(self, batch, batch_idx, *args, **kwargs): """ x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) loss_test = self.loss(y, y_hat) @@ -460,7 +460,7 @@ def test_step(self, batch, batch_idx, dataloader_idx, **kwargs): """ x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) loss_test = self.loss(y, y_hat) @@ -512,7 +512,7 @@ def validation_step(self, batch, batch_idx, *args, **kwargs): """ x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) loss_val = self.loss(y, y_hat) @@ -558,7 +558,7 @@ def validation_step(self, batch, batch_idx, dataloader_idx, **kwargs): """ x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) loss_val = self.loss(y, y_hat) diff --git a/tests/models/base.py b/tests/base/models.py similarity index 77% rename from tests/models/base.py rename to tests/base/models.py index 0e8e60392fdea..1b60f3b20c27f 100644 --- a/tests/models/base.py +++ b/tests/base/models.py @@ -1,5 +1,6 @@ import os from collections import OrderedDict +from typing import Dict import torch import torch.nn as nn @@ -8,7 +9,6 @@ from torch.utils.data import DataLoader from torchvision import transforms from torchvision.datasets import MNIST -from typing import Dict try: from test_tube import HyperOptArgumentParser @@ -45,7 +45,7 @@ def __init__(self, root, train=True, transform=None, target_transform=None, class DictHparamsModel(LightningModule): def __init__(self, hparams: Dict): - super(DictHparamsModel, self).__init__() + super().__init__() self.hparams = hparams self.l1 = torch.nn.Linear(hparams.get('in_features'), hparams['out_features']) @@ -54,7 +54,7 @@ def forward(self, x): def training_step(self, batch, batch_idx): x, y = batch - y_hat = self.forward(x) + y_hat = self(x) return {'loss': F.cross_entropy(y_hat, y)} def configure_optimizers(self): @@ -140,7 +140,7 @@ def training_step(self, batch, batch_idx, optimizer_idx=None): x, y = batch x = x.view(x.size(0), -1) - y_hat = self.forward(x) + y_hat = self(x) # calculate loss loss_val = self.loss(y, y_hat) @@ -174,9 +174,8 @@ def configure_optimizers(self): optimizer = optim.LBFGS(self.parameters(), lr=self.hparams.learning_rate) else: optimizer = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) - - # test returning only 1 list instead of 2 - return optimizer + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10) + return [optimizer], [scheduler] def prepare_data(self): transform = transforms.Compose([transforms.ToTensor(), @@ -201,36 +200,3 @@ def _dataloader(self, train): ) return loader - - @staticmethod - def add_model_specific_args(parent_parser, root_dir): # pragma: no-cover - """ - Parameters you define here will be available to your model through self.hparams - :param parent_parser: - :param root_dir: - :return: - """ - parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) - - # param overwrites - # parser.set_defaults(gradient_clip_val=5.0) - - # network params - parser.opt_list('--drop_prob', default=0.2, options=[0.2, 0.5], type=float, tunable=False) - parser.add_argument('--in_features', default=28 * 28, type=int) - parser.add_argument('--out_features', default=10, type=int) - # use 500 for CPU, 50000 for GPU to see speed difference - parser.add_argument('--hidden_dim', default=50000, type=int) - # data - parser.add_argument('--data_root', default=os.path.join(root_dir, 'mnist'), type=str) - # training params (opt) - parser.opt_list('--learning_rate', default=0.001 * 8, type=float, - options=[0.0001, 0.0005, 0.001, 0.005], tunable=False) - parser.opt_list('--optimizer_name', default='adam', type=str, - options=['adam'], tunable=False) - # if using 2 nodes with 4 gpus each the batch size here - # (256) will be 256 / (2*8) = 16 per gpu - parser.opt_list('--batch_size', default=256 * 8, type=int, - options=[32, 64, 128, 256], tunable=False, - help='batch size will be divided over all GPUs being used across all nodes') - return parser diff --git a/tests/models/utils.py b/tests/base/utils.py similarity index 91% rename from tests/models/utils.py rename to tests/base/utils.py index 2f971162fcf5b..c6b8e3ceaf67a 100644 --- a/tests/models/utils.py +++ b/tests/base/utils.py @@ -5,11 +5,11 @@ import numpy as np import torch -from pl_examples import LightningTemplateModel +# from pl_examples import LightningTemplateModel from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.loggers import TestTubeLogger, TensorBoardLogger -from tests.models import LightningTestModel +from tests.base import LightningTestModel # generate a list of random seeds for each test RANDOM_PORTS = list(np.random.randint(12000, 19000, 1000)) @@ -21,7 +21,7 @@ def run_model_test_no_loggers(trainer_options, model, min_acc=0.50): - save_dir = trainer_options['default_save_path'] + # save_dir = trainer_options['default_save_path'] # fit model trainer = Trainer(**trainer_options) @@ -53,7 +53,7 @@ def run_model_test(trainer_options, model, on_gpu=True): save_dir = trainer_options['default_save_path'] # logger file to get meta - logger = get_test_tube_logger(save_dir, False) + logger = get_default_testtube_logger(save_dir, False) # logger file to get weights checkpoint = init_checkpoint_callback(logger) @@ -89,7 +89,7 @@ def run_model_test(trainer_options, model, on_gpu=True): trainer.hpc_load(save_dir, on_gpu=on_gpu) -def get_hparams(continue_training=False, hpc_exp_number=0): +def get_default_hparams(continue_training=False, hpc_exp_number=0): tests_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) args = { @@ -111,22 +111,19 @@ def get_hparams(continue_training=False, hpc_exp_number=0): return hparams -def get_model(use_test_model=False, lbfgs=False): +def get_default_model(lbfgs=False): # set up model with these hyperparams - hparams = get_hparams() + hparams = get_default_hparams() if lbfgs: setattr(hparams, 'optimizer_name', 'lbfgs') setattr(hparams, 'learning_rate', 0.002) - if use_test_model: - model = LightningTestModel(hparams) - else: - model = LightningTemplateModel(hparams) + model = LightningTestModel(hparams) return model, hparams -def get_test_tube_logger(save_dir, debug=True, version=None): +def get_default_testtube_logger(save_dir, debug=True, version=None): # set up logger object without actually saving logs logger = TestTubeLogger(save_dir, name='lightning_logs', debug=debug, version=version) return logger @@ -150,7 +147,7 @@ def get_data_path(expt_logger, path_dir=None): return path_expt -def load_model(exp, root_weights_dir, module_class=LightningTemplateModel, path_expt=None): +def load_model(exp, root_weights_dir, module_class=LightningTestModel, path_expt=None): # load trained model path_expt_dir = get_data_path(exp, path_dir=path_expt) tags_path = os.path.join(path_expt_dir, TensorBoardLogger.NAME_CSV_TAGS) @@ -168,7 +165,7 @@ def load_model(exp, root_weights_dir, module_class=LightningTemplateModel, path_ return trained_model -def load_model_from_checkpoint(root_weights_dir, module_class=LightningTemplateModel): +def load_model_from_checkpoint(root_weights_dir, module_class=LightningTestModel): # load trained model checkpoints = [x for x in os.listdir(root_weights_dir) if '.ckpt' in x] weights_dir = os.path.join(root_weights_dir, checkpoints[0]) @@ -182,7 +179,7 @@ def load_model_from_checkpoint(root_weights_dir, module_class=LightningTemplateM return trained_model -def run_prediction(dataloader, trained_model, dp=False, min_acc=0.45): +def run_prediction(dataloader, trained_model, dp=False, min_acc=0.35): # run prediction on 1 batch for batch in dataloader: break diff --git a/tests/collect_env_details.py b/tests/collect_env_details.py new file mode 100644 index 0000000000000..957397f3bb63f --- /dev/null +++ b/tests/collect_env_details.py @@ -0,0 +1,98 @@ +"""Diagnose your system and show basic information + +This server mainly to get detail info for better bug reporting. + +""" + +import os +import re +import sys +import platform + +import numpy +import tensorboard +import torch +import tqdm + +sys.path += [os.path.abspath('..'), os.path.abspath('.')] +import pytorch_lightning # noqa: E402 + +LEVEL_OFFSET = '\t' +KEY_PADDING = 20 + + +def run_and_parse_first_match(run_lambda, command, regex): + """Runs command using run_lambda, returns the first regex match if it exists""" + rc, out, _ = run_lambda(command) + if rc != 0: + return None + match = re.search(regex, out) + if match is None: + return None + return match.group(1) + + +def get_running_cuda_version(run_lambda): + return run_and_parse_first_match(run_lambda, 'nvcc --version', r'V(.*)$') + + +def info_system(): + return { + 'OS': platform.system(), + 'architecture': platform.architecture(), + 'version': platform.version(), + 'processor': platform.processor(), + 'python': platform.python_version(), + } + + +def info_cuda(): + return { + 'GPU': set([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]), + # 'nvidia_driver': get_nvidia_driver_version(run_lambda), + 'available': torch.cuda.is_available(), + 'version': torch.version.cuda, + } + + +def info_packages(): + return { + 'numpy': numpy.__version__, + "pyTorch_version": torch.__version__, + 'pyTorch_debug': torch.version.debug, + 'pytorch-lightning': pytorch_lightning.__version__, + 'tensorboard': tensorboard.__version__, + 'tqdm': tqdm.__version__, + } + + +def nice_print(details, level=0): + lines = [] + for k in sorted(details): + key = f'{k}:' + if isinstance(details[k], dict): + lines += [level * LEVEL_OFFSET + key] + lines += nice_print(details[k], level + 1) + elif isinstance(details[k], (set, list, tuple)): + lines += [level * LEVEL_OFFSET + key] + lines += [(level + 1) * LEVEL_OFFSET + v for v in details[k]] + else: + template = '{:%is} {}' % KEY_PADDING + key_val = template.format(key, details[k]) + lines += [(level * LEVEL_OFFSET) + key_val] + return lines + + +def main(): + details = { + "system": info_system(), + 'cuda': info_cuda(), + 'packages': info_packages(), + } + lines = nice_print(details) + text = os.linesep.join(lines) + print(text) + + +if __name__ == '__main__': + main() diff --git a/tests/install_AMP.sh b/tests/install_AMP.sh new file mode 100644 index 0000000000000..2c56bb25b742b --- /dev/null +++ b/tests/install_AMP.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +ROOT=$PWD +git clone https://github.com/NVIDIA/apex +cd apex +pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ +pip install -v --no-cache-dir ./ +cd $ROOT +rm -rf apex diff --git a/tests/loggers/test_base.py b/tests/loggers/test_base.py index 6f386ed39b55c..9217e1c27de9c 100644 --- a/tests/loggers/test_base.py +++ b/tests/loggers/test_base.py @@ -1,10 +1,10 @@ import pickle from unittest.mock import MagicMock -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.loggers import LightningLoggerBase, rank_zero_only, LoggerCollection -from tests.models import LightningTestModel +from tests.base import LightningTestModel def test_logger_collection(): @@ -57,7 +57,7 @@ def version(self): def test_custom_logger(tmpdir): - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) logger = CustomLogger() @@ -78,7 +78,7 @@ def test_custom_logger(tmpdir): def test_multiple_loggers(tmpdir): - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) logger1 = CustomLogger() @@ -137,7 +137,7 @@ def decorated(metrics, step): return decorated - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() model.validation_epoch_end = _validation_end trainer_options = dict( max_epochs=4, diff --git a/tests/loggers/test_comet.py b/tests/loggers/test_comet.py index 69f434c06e7ed..1aaf4cb7fd62f 100644 --- a/tests/loggers/test_comet.py +++ b/tests/loggers/test_comet.py @@ -5,11 +5,11 @@ import pytest import torch -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.loggers import CometLogger from pytorch_lightning.utilities.debugging import MisconfigurationException -from tests.models import LightningTestModel +from tests.base import LightningTestModel def test_comet_logger(tmpdir, monkeypatch): @@ -22,7 +22,7 @@ def test_comet_logger(tmpdir, monkeypatch): tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) comet_dir = os.path.join(tmpdir, 'cometruns') @@ -132,7 +132,7 @@ def test_comet_pickle(tmpdir, monkeypatch): tutils.reset_seed() - # hparams = tutils.get_hparams() + # hparams = tutils.get_default_hparams() # model = LightningTestModel(hparams) comet_dir = os.path.join(tmpdir, 'cometruns') diff --git a/tests/loggers/test_mlflow.py b/tests/loggers/test_mlflow.py index 6e49a9fe45fb7..54e57c7dee2a2 100644 --- a/tests/loggers/test_mlflow.py +++ b/tests/loggers/test_mlflow.py @@ -1,17 +1,17 @@ import os import pickle -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.loggers import MLFlowLogger -from tests.models import LightningTestModel +from tests.base import LightningTestModel def test_mlflow_logger(tmpdir): """Verify that basic functionality of mlflow logger works.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) mlflow_dir = os.path.join(tmpdir, 'mlruns') diff --git a/tests/loggers/test_neptune.py b/tests/loggers/test_neptune.py index 5c2ab5b52029a..0e586c33fcf76 100644 --- a/tests/loggers/test_neptune.py +++ b/tests/loggers/test_neptune.py @@ -1,20 +1,19 @@ import pickle - from unittest.mock import patch, MagicMock import torch -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.loggers import NeptuneLogger -from tests.models import LightningTestModel +from tests.base import LightningTestModel def test_neptune_logger(tmpdir): """Verify that basic functionality of neptune logger works.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) logger = NeptuneLogger(offline_mode=True) @@ -103,7 +102,7 @@ def test_neptune_leave_open_experiment_after_fit(tmpdir): """Verify that neptune experiment was closed after training""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) def _run_training(logger): diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py index 220cdeb59a7d7..b938be4d64b48 100644 --- a/tests/loggers/test_tensorboard.py +++ b/tests/loggers/test_tensorboard.py @@ -4,16 +4,16 @@ import pytest import torch -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.loggers import TensorBoardLogger -from tests.models import LightningTestModel +from tests.base import LightningTestModel def test_tensorboard_logger(tmpdir): """Verify that basic functionality of Tensorboard logger works.""" - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) logger = TensorBoardLogger(save_dir=tmpdir, name="tensorboard_logger_test") diff --git a/tests/loggers/test_test_tube.py b/tests/loggers/test_test_tube.py index 0788e0cd26130..68ac8d93ebe58 100644 --- a/tests/loggers/test_test_tube.py +++ b/tests/loggers/test_test_tube.py @@ -1,17 +1,17 @@ import pickle -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer -from tests.models import LightningTestModel +from tests.base import LightningTestModel def test_testtube_logger(tmpdir): """Verify that basic functionality of test tube logger works.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) assert logger.name == 'lightning_logs' @@ -32,9 +32,9 @@ def test_testtube_pickle(tmpdir): """Verify that pickling a trainer containing a test tube logger works.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) logger.log_hyperparams(hparams) logger.save() diff --git a/tests/loggers/test_trains.py b/tests/loggers/test_trains.py index 384d6be8bb080..858ac64a3ae5e 100644 --- a/tests/loggers/test_trains.py +++ b/tests/loggers/test_trains.py @@ -1,16 +1,16 @@ import pickle -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.loggers import TrainsLogger -from tests.models import LightningTestModel +from tests.base import LightningTestModel def test_trains_logger(tmpdir): """Verify that basic functionality of TRAINS logger works.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) TrainsLogger.set_bypass_mode(True) TrainsLogger.set_credentials(api_host='http://integration.trains.allegro.ai:8008', @@ -36,7 +36,7 @@ def test_trains_pickle(tmpdir): """Verify that pickling trainer with TRAINS logger works.""" tutils.reset_seed() - # hparams = tutils.get_hparams() + # hparams = tutils.get_default_hparams() # model = LightningTestModel(hparams) TrainsLogger.set_bypass_mode(True) TrainsLogger.set_credentials(api_host='http://integration.trains.allegro.ai:8008', diff --git a/tests/loggers/test_wandb.py b/tests/loggers/test_wandb.py index abb49544b1ade..8e9d6c49b7165 100644 --- a/tests/loggers/test_wandb.py +++ b/tests/loggers/test_wandb.py @@ -4,7 +4,7 @@ import pytest -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.loggers import WandbLogger diff --git a/tests/models/__init__.py b/tests/models/__init__.py index 67206a63d0fe6..e69de29bb2d1d 100644 --- a/tests/models/__init__.py +++ b/tests/models/__init__.py @@ -1,57 +0,0 @@ -"""Models for testing.""" - -import torch - -from .base import TestModelBase, DictHparamsModel -from .mixins import ( - LightEmptyTestStep, - LightValidationStepMixin, - LightValidationMixin, - LightValidationStepMultipleDataloadersMixin, - LightValidationMultipleDataloadersMixin, - LightTestStepMixin, - LightTestMixin, - LightTestStepMultipleDataloadersMixin, - LightTestMultipleDataloadersMixin, - LightTestFitSingleTestDataloadersMixin, - LightTestFitMultipleTestDataloadersMixin, - LightValStepFitSingleDataloaderMixin, - LightValStepFitMultipleDataloadersMixin, - LightTrainDataloader, - LightTestDataloader, - LightInfTrainDataloader, - LightInfValDataloader, - LightInfTestDataloader, - LightTestOptimizerWithSchedulingMixin, - LightTestMultipleOptimizersWithSchedulingMixin, - LightTestOptimizersWithMixedSchedulingMixin, - LightTestReduceLROnPlateauMixin -) - - -class LightningTestModel(LightTrainDataloader, - LightValidationMixin, - LightTestMixin, - TestModelBase): - """Most common test case. Validation and test dataloaders.""" - - def on_training_metrics(self, logs): - logs['some_tensor_to_test'] = torch.rand(1) - - -class LightningTestModelWithoutHyperparametersArg(LightningTestModel): - """ without hparams argument in constructor """ - - def __init__(self): - import tests.models.utils as tutils - - # the user loads the hparams in some other way - hparams = tutils.get_hparams() - super().__init__(hparams) - - -class LightningTestModelWithUnusedHyperparametersArg(LightningTestModelWithoutHyperparametersArg): - """ has hparams argument in constructor but is not used """ - - def __init__(self, hparams): - super().__init__() diff --git a/tests/test_amp.py b/tests/models/test_amp.py similarity index 89% rename from tests/test_amp.py rename to tests/models/test_amp.py index 832c7ba7a8c95..13d6ff0b5848a 100644 --- a/tests/test_amp.py +++ b/tests/models/test_amp.py @@ -2,10 +2,10 @@ import pytest -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.utilities.debugging import MisconfigurationException -from tests.models import ( +from tests.base import ( LightningTestModel, ) @@ -17,7 +17,7 @@ def test_amp_single_gpu(tmpdir): if not tutils.can_run_gpu_test(): return - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) trainer_options = dict( @@ -40,7 +40,7 @@ def test_no_amp_single_gpu(tmpdir): if not tutils.can_run_gpu_test(): return - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) trainer_options = dict( @@ -66,7 +66,7 @@ def test_amp_gpu_ddp(tmpdir): tutils.reset_seed() tutils.set_random_master_port() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) trainer_options = dict( @@ -93,7 +93,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir): tutils.set_random_master_port() os.environ['SLURM_LOCALID'] = str(0) - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) trainer_options = dict( @@ -105,7 +105,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir): ) # exp file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) # exp file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -136,14 +136,14 @@ def test_cpu_model_with_amp(tmpdir): trainer_options = dict( default_save_path=tmpdir, show_progress_bar=False, - logger=tutils.get_test_tube_logger(tmpdir), + logger=tutils.get_default_testtube_logger(tmpdir), max_epochs=1, train_percent_check=0.4, val_percent_check=0.4, precision=16 ) - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() with pytest.raises((MisconfigurationException, ModuleNotFoundError)): tutils.run_model_test(trainer_options, model, on_gpu=False) @@ -157,7 +157,7 @@ def test_amp_gpu_dp(tmpdir): if not tutils.can_run_gpu_test(): return - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() trainer_options = dict( default_save_path=tmpdir, max_epochs=1, diff --git a/tests/test_cpu_models.py b/tests/models/test_cpu.py similarity index 78% rename from tests/test_cpu_models.py rename to tests/models/test_cpu.py index 38fc790430fd7..07c9968a4135f 100644 --- a/tests/test_cpu_models.py +++ b/tests/models/test_cpu.py @@ -4,16 +4,17 @@ import pytest import torch -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ( EarlyStopping, ) -from tests.models import ( +from tests.base import ( TestModelBase, LightTrainDataloader, LightningTestModel, LightTestMixin, + LightValidationMixin ) @@ -29,12 +30,12 @@ def test_early_stopping_cpu_model(tmpdir): overfit_pct=0.20, track_grad_norm=2, show_progress_bar=True, - logger=tutils.get_test_tube_logger(tmpdir), + logger=tutils.get_default_testtube_logger(tmpdir), train_percent_check=0.1, val_percent_check=0.1, ) - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, on_gpu=False) # test freeze on cpu @@ -55,7 +56,7 @@ def test_lbfgs_cpu_model(tmpdir): val_percent_check=0.2, ) - model, hparams = tutils.get_model(use_test_model=True, lbfgs=True) + model, hparams = tutils.get_default_model(lbfgs=True) tutils.run_model_test_no_loggers(trainer_options, model, min_acc=0.30) @@ -73,7 +74,7 @@ def test_default_logger_callbacks_cpu_model(tmpdir): val_percent_check=0.01, ) - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() tutils.run_model_test_no_loggers(trainer_options, model) # test freeze on cpu @@ -85,11 +86,11 @@ def test_running_test_after_fitting(tmpdir): """Verify test() on fitted model.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) # logger file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -97,7 +98,7 @@ def test_running_test_after_fitting(tmpdir): trainer_options = dict( default_save_path=tmpdir, show_progress_bar=False, - max_epochs=4, + max_epochs=8, train_percent_check=0.4, val_percent_check=0.2, test_percent_check=0.2, @@ -114,7 +115,7 @@ def test_running_test_after_fitting(tmpdir): trainer.test() # test we have good test accuracy - tutils.assert_ok_model_acc(trainer) + tutils.assert_ok_model_acc(trainer, thr=0.35) def test_running_test_without_val(tmpdir): @@ -124,11 +125,11 @@ def test_running_test_without_val(tmpdir): class CurrentTestModel(LightTrainDataloader, LightTestMixin, TestModelBase): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # logger file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -156,6 +157,55 @@ class CurrentTestModel(LightTrainDataloader, LightTestMixin, TestModelBase): tutils.assert_ok_model_acc(trainer) +def test_disabled_validation(): + """Verify that `val_percent_check=0` disables the validation loop unless `fast_dev_run=True`.""" + tutils.reset_seed() + + class CurrentModel(LightTrainDataloader, LightValidationMixin, TestModelBase): + + validation_step_invoked = False + validation_end_invoked = False + + def validation_step(self, *args, **kwargs): + self.validation_step_invoked = True + return super().validation_step(*args, **kwargs) + + def validation_end(self, *args, **kwargs): + self.validation_end_invoked = True + return super().validation_end(*args, **kwargs) + + hparams = tutils.get_default_hparams() + model = CurrentModel(hparams) + + trainer_options = dict( + show_progress_bar=False, + max_epochs=2, + train_percent_check=0.4, + val_percent_check=0.0, + fast_dev_run=False, + ) + + trainer = Trainer(**trainer_options) + result = trainer.fit(model) + + # check that val_percent_check=0 turns off validation + assert result == 1, 'training failed to complete' + assert trainer.current_epoch == 1 + assert not model.validation_step_invoked, '`validation_step` should not run when `val_percent_check=0`' + assert not model.validation_end_invoked, '`validation_end` should not run when `val_percent_check=0`' + + # check that val_percent_check has no influence when fast_dev_run is turned on + model = CurrentModel(hparams) + trainer_options.update(fast_dev_run=True) + trainer = Trainer(**trainer_options) + result = trainer.fit(model) + + assert result == 1, 'training failed to complete' + assert trainer.current_epoch == 0 + assert model.validation_step_invoked, 'did not run `validation_step` with `fast_dev_run=True`' + assert model.validation_end_invoked, 'did not run `validation_end` with `fast_dev_run=True`' + + def test_single_gpu_batch_parse(): tutils.reset_seed() @@ -204,7 +254,7 @@ def test_simple_cpu(tmpdir): """Verify continue training session on CPU.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) # logger file to get meta @@ -230,13 +280,13 @@ def test_cpu_model(tmpdir): trainer_options = dict( default_save_path=tmpdir, show_progress_bar=False, - logger=tutils.get_test_tube_logger(tmpdir), + logger=tutils.get_default_testtube_logger(tmpdir), max_epochs=1, train_percent_check=0.4, val_percent_check=0.4 ) - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, on_gpu=False) @@ -251,14 +301,14 @@ def test_all_features_cpu_model(tmpdir): overfit_pct=0.20, track_grad_norm=2, show_progress_bar=False, - logger=tutils.get_test_tube_logger(tmpdir), + logger=tutils.get_default_testtube_logger(tmpdir), accumulate_grad_batches=2, max_epochs=1, train_percent_check=0.4, val_percent_check=0.4 ) - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, on_gpu=False) @@ -295,7 +345,7 @@ def training_step(self, batch, batch_idx, hiddens): y_tensor = torch.tensor(y_list, dtype=x_tensor.dtype) assert y_tensor.shape[1] == truncated_bptt_steps, "tbptt split list failed" - pred = self.forward(x_tensor.view(batch_size, truncated_bptt_steps)) + pred = self(x_tensor.view(batch_size, truncated_bptt_steps)) loss_val = torch.nn.functional.mse_loss( pred, y_tensor.view(batch_size, truncated_bptt_steps)) return { @@ -320,7 +370,7 @@ def train_dataloader(self): early_stop_callback=False ) - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() hparams.batch_size = batch_size hparams.in_features = truncated_bptt_steps hparams.hidden_dim = truncated_bptt_steps @@ -343,7 +393,7 @@ def test_single_gpu_model(tmpdir): warnings.warn('test_single_gpu_model cannot run.' ' Rerun on a GPU node to run this test') return - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() trainer_options = dict( default_save_path=tmpdir, @@ -371,7 +421,7 @@ def training_step(self, batch, batch_idx): output /= 0 return output - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = InfLossModel(hparams) # fit model @@ -398,7 +448,7 @@ def on_after_backward(self): # simulate parameter that became nan torch.nn.init.constant_(self.c_d1.bias, math.nan) - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = NanParamModel(hparams) trainer = Trainer( diff --git a/tests/test_gpu_models.py b/tests/models/test_gpu.py similarity index 95% rename from tests/test_gpu_models.py rename to tests/models/test_gpu.py index a95e4d42ddae1..9c684ca6bfbf0 100644 --- a/tests/test_gpu_models.py +++ b/tests/models/test_gpu.py @@ -3,20 +3,16 @@ import pytest import torch -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import ( - ModelCheckpoint, -) +from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.core import memory from pytorch_lightning.trainer.distrib_parts import ( parse_gpu_ids, determine_root_gpu_device, ) from pytorch_lightning.utilities.debugging import MisconfigurationException -from tests.models import ( - LightningTestModel, -) +from tests.base import LightningTestModel PRETEND_N_OF_GPUS = 16 @@ -29,7 +25,7 @@ def test_multi_gpu_model_ddp2(tmpdir): tutils.reset_seed() tutils.set_random_master_port() - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() trainer_options = dict( default_save_path=tmpdir, show_progress_bar=True, @@ -52,7 +48,7 @@ def test_multi_gpu_model_ddp(tmpdir): tutils.reset_seed() tutils.set_random_master_port() - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() trainer_options = dict( default_save_path=tmpdir, show_progress_bar=False, @@ -74,7 +70,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir): tutils.reset_seed() tutils.set_random_master_port() - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, max_epochs=1, @@ -95,7 +91,7 @@ def test_optimizer_return_options(): tutils.reset_seed() trainer = Trainer() - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() # single optimizer opt_a = torch.optim.Adam(model.parameters(), lr=0.002) @@ -130,11 +126,11 @@ def test_cpu_slurm_save_load(tmpdir): """Verify model save/load/checkpoint on CPU.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) # logger file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) version = logger.version trainer_options = dict( @@ -173,7 +169,7 @@ def test_cpu_slurm_save_load(tmpdir): assert os.path.exists(saved_filepath) # new logger file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False, version=version) + logger = tutils.get_default_testtube_logger(tmpdir, False, version=version) trainer_options = dict( max_epochs=1, @@ -206,7 +202,7 @@ def test_multi_gpu_none_backend(tmpdir): if not tutils.can_run_gpu_test(): return - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() trainer_options = dict( default_save_path=tmpdir, show_progress_bar=False, @@ -227,7 +223,7 @@ def test_multi_gpu_model_dp(tmpdir): if not tutils.can_run_gpu_test(): return - model, hparams = tutils.get_model() + model, hparams = tutils.get_default_model() trainer_options = dict( default_save_path=tmpdir, show_progress_bar=False, diff --git a/tests/test_restore_models.py b/tests/models/test_restore.py similarity index 93% rename from tests/test_restore_models.py rename to tests/models/test_restore.py index cf3a6773ca142..d0088c268ed44 100644 --- a/tests/test_restore_models.py +++ b/tests/models/test_restore.py @@ -5,11 +5,11 @@ import pytest import torch -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.utilities.debugging import MisconfigurationException -from tests.models import ( +from tests.base import ( LightningTestModel, LightningTestModelWithoutHyperparametersArg, LightningTestModelWithUnusedHyperparametersArg @@ -24,11 +24,11 @@ def test_running_test_pretrained_model_ddp(tmpdir): tutils.reset_seed() tutils.set_random_master_port() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) # exp file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) # exp file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -72,11 +72,11 @@ def test_running_test_pretrained_model(tmpdir): """Verify test() on pretrained model.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) # logger file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -111,7 +111,7 @@ def test_load_model_from_checkpoint(tmpdir): """Verify test() on pretrained model.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) trainer_options = dict( @@ -158,11 +158,11 @@ def test_running_test_pretrained_model_dp(tmpdir): if not tutils.can_run_gpu_test(): return - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) # logger file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -202,7 +202,7 @@ def test_dp_resume(tmpdir): tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) trainer_options = dict( @@ -213,7 +213,7 @@ def test_dp_resume(tmpdir): ) # get logger - logger = tutils.get_test_tube_logger(tmpdir, debug=False) + logger = tutils.get_default_testtube_logger(tmpdir, debug=False) # exp file to get weights # logger file to get weights @@ -241,7 +241,7 @@ def test_dp_resume(tmpdir): trainer.hpc_save(tmpdir, logger) # init new trainer - new_logger = tutils.get_test_tube_logger(tmpdir, version=logger.version) + new_logger = tutils.get_default_testtube_logger(tmpdir, version=logger.version) trainer_options['logger'] = new_logger trainer_options['checkpoint_callback'] = ModelCheckpoint(tmpdir) trainer_options['train_percent_check'] = 0.5 @@ -277,11 +277,11 @@ def test_model_saving_loading(tmpdir): """Tests use case where trainer saves the model, and user loads it from tags independently.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) # logger file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) trainer_options = dict( max_epochs=1, diff --git a/tests/requirements.txt b/tests/requirements.txt index 8267651041788..b82220b764766 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,7 @@ +# install all extra dependencies for full package testing +-r ../requirements-extra.txt + +# extended list of dependencies dor development and run lint and tests torchvision>=0.4.0, < 0.5 # the 0.5. has some issues with torch JIT tox coverage @@ -8,5 +12,4 @@ pytest-flake8 flake8 check-manifest twine==1.13.0 -pillow<7.0.0 --r ../requirements-extra.txt \ No newline at end of file +pillow<7.0.0 \ No newline at end of file diff --git a/tests/test_deprecated.py b/tests/test_deprecated.py index a79eb7451305f..ddaae354168b3 100644 --- a/tests/test_deprecated.py +++ b/tests/test_deprecated.py @@ -2,8 +2,8 @@ from pytorch_lightning import Trainer -import tests.models.utils as tutils -from tests.models import TestModelBase, LightTrainDataloader, LightEmptyTestStep +import tests.base.utils as tutils +from tests.base import TestModelBase, LightTrainDataloader, LightEmptyTestStep def test_tbd_remove_in_v0_8_0_module_imports(): @@ -85,7 +85,7 @@ def test_end(self, outputs): def test_tbd_remove_in_v1_0_0_model_hooks(): - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = ModelVer0_6(hparams) diff --git a/tests/trainer/test_callbacks.py b/tests/trainer/test_callbacks.py index 55a84633c87ca..377dce76662e4 100644 --- a/tests/trainer/test_callbacks.py +++ b/tests/trainer/test_callbacks.py @@ -1,10 +1,7 @@ -import os - -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Callback from pytorch_lightning import Trainer, LightningModule -from pytorch_lightning.callbacks import ModelCheckpoint -from tests.models import ( +from tests.base import ( TestModelBase, LightTrainDataloader, LightValidationMixin, @@ -23,7 +20,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) def _check_args(trainer, pl_module): diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index 40670dafdf4bf..6f0ee15aef0ec 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -1,9 +1,9 @@ import pytest -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.utilities.debugging import MisconfigurationException -from tests.models import ( +from tests.base import ( TestModelBase, LightningTestModel, LightEmptyTestStep, @@ -29,7 +29,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # percent check < 0 @@ -104,7 +104,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # logger file to get meta @@ -143,7 +143,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # logger file to get meta @@ -178,7 +178,7 @@ def test_train_dataloaders_passed_to_fit(tmpdir): class CurrentTestModel(LightTrainDataloader, TestModelBase): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() # logger file to get meta trainer_options = dict( @@ -208,7 +208,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() # logger file to get meta trainer_options = dict( @@ -243,7 +243,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() # logger file to get meta trainer_options = dict( @@ -282,7 +282,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() # logger file to get meta trainer_options = dict( @@ -321,7 +321,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # logger file to get meta @@ -360,7 +360,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # fit model @@ -394,7 +394,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # fit model @@ -428,7 +428,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # fit model diff --git a/tests/trainer/test_optimizers.py b/tests/trainer/test_optimizers.py index 3ea0e3ff2aab7..4de3580eba22f 100644 --- a/tests/trainer/test_optimizers.py +++ b/tests/trainer/test_optimizers.py @@ -1,13 +1,7 @@ -import math -import os - -import pytest -import torch - -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer -from tests.models import ( +from tests.base import ( TestModelBase, LightTrainDataloader, LightValidationStepMixin, @@ -29,7 +23,7 @@ class CurrentTestModel( TestModelBase): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # logger file to get meta @@ -68,7 +62,7 @@ class CurrentTestModel( TestModelBase): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # logger file to get meta @@ -111,7 +105,7 @@ class CurrentTestModel( TestModelBase): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # logger file to get meta @@ -160,7 +154,7 @@ class CurrentTestModel( TestModelBase): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # logger file to get meta diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 47a1eb3d1bcfc..89d849d7125e3 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -6,7 +6,7 @@ import pytest import torch -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ( EarlyStopping, @@ -15,7 +15,7 @@ from pytorch_lightning.core.lightning import load_hparams_from_tags_csv from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.utilities.debugging import MisconfigurationException -from tests.models import ( +from tests.base import ( TestModelBase, DictHparamsModel, LightningTestModel, @@ -53,7 +53,7 @@ def test_no_val_module(tmpdir): """Tests use case where trainer saves the model, and user loads it from tags independently.""" tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() class CurrentTestModel(LightTrainDataloader, TestModelBase): pass @@ -61,7 +61,7 @@ class CurrentTestModel(LightTrainDataloader, TestModelBase): model = CurrentTestModel(hparams) # logger file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) trainer_options = dict( max_epochs=1, @@ -97,11 +97,11 @@ def test_no_val_end_module(tmpdir): class CurrentTestModel(LightTrainDataloader, LightValidationStepMixin, TestModelBase): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # logger file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) trainer_options = dict( max_epochs=1, @@ -189,7 +189,7 @@ def _optimizer_step(self, epoch, batch_idx, optimizer, # clear gradients optimizer.zero_grad() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) schedule = {1: 2, 3: 4} @@ -209,10 +209,10 @@ def _optimizer_step(self, epoch, batch_idx, optimizer, def test_loading_meta_tags(tmpdir): tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() # save tags - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) logger.log_hyperparams(Namespace(some_str='a_str', an_int=1, a_float=2.0)) logger.log_hyperparams(hparams) logger.save() @@ -254,7 +254,7 @@ def test_model_checkpoint_options(tmpdir): def mock_save_function(filepath): open(filepath, 'a').close() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() _ = LightningTestModel(hparams) # simulated losses @@ -355,7 +355,7 @@ def mock_save_function(filepath): os.mkdir(save_dir) # ----------------- - # CASE K=4 (save all 4 models) + # CASE K=4 (save all 4 base) # multiple checkpoints within same epoch checkpoint_callback = ModelCheckpoint(save_dir, save_top_k=4, verbose=1) @@ -401,7 +401,7 @@ def mock_save_function(filepath): def test_model_freeze_unfreeze(): tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) model.freeze() @@ -414,7 +414,7 @@ def test_resume_from_checkpoint_epoch_restored(tmpdir): tutils.reset_seed() - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() def _new_model(): # Create a model that tracks epochs and batches seen @@ -474,7 +474,7 @@ def increment_batch(self, _): def _init_steps_model(): """private method for initializing a model with 5% train epochs""" tutils.reset_seed() - model, _ = tutils.get_model() + model, _ = tutils.get_default_model() # define train epoch to 5% of data train_percent = 0.05 @@ -530,7 +530,7 @@ def test_trainer_min_steps_and_epochs(tmpdir): trainer_options.update(dict( default_save_path=tmpdir, early_stop_callback=EarlyStopping(monitor='val_loss', min_delta=1.0), - val_check_interval=20, + val_check_interval=2, min_epochs=1, max_epochs=10 )) @@ -571,7 +571,7 @@ class CurrentTestModel( ): pass - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() model = CurrentTestModel(hparams) # verify torch.backends.cudnn.benchmark is not turned on @@ -596,7 +596,7 @@ class CurrentTestModel( def test_testpass_overrides(tmpdir): - hparams = tutils.get_hparams() + hparams = tutils.get_default_hparams() class LocalModel(LightTrainDataloader, TestModelBase): pass diff --git a/tests/trainer/test_trainer_cli.py b/tests/trainer/test_trainer_cli.py index 294ac9bcfd222..92bbd647b0aab 100644 --- a/tests/trainer/test_trainer_cli.py +++ b/tests/trainer/test_trainer_cli.py @@ -4,7 +4,7 @@ import pytest -import tests.models.utils as tutils +import tests.base.utils as tutils from pytorch_lightning import Trainer @@ -15,7 +15,7 @@ def test_default_args(tmpdir): tutils.reset_seed() # logger file to get meta - logger = tutils.get_test_tube_logger(tmpdir, False) + logger = tutils.get_default_testtube_logger(tmpdir, False) parser = ArgumentParser(add_help=False) args = parser.parse_args() diff --git a/tox.ini b/tox.ini deleted file mode 100644 index f181bc64dd14d..0000000000000 --- a/tox.ini +++ /dev/null @@ -1,54 +0,0 @@ -# this file is *not* meant to cover or endorse the use of tox or pytest or testing in general, -# -# It's meant to show the use of: -# -# - check-manifest -# confirm items checked into vcs are in your segdist -# - python setup.py check -# confirm required package meta-data in setup.py -# - readme_renderer (when using a ReStructuredText README) -# confirms your long_description will render correctly on PyPI. -# -# and also to help confirm pull requests to this project. - -[tox] -envlist = py{35,36,37,38} - -# DROP, it is duplication of setup.cfg -# [pytest] -# log_cli = 0 -# log_cli_level = CRITICAL -# log_cli_format = %(message)s -# log_file = pytest.log -# log_file_level = DEBUG -# log_file_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s) -# log_file_date_format=%Y-%m-%d %H:%M:%S - -[testenv] -basepython = - py35: python3.5 - py36: python3.6 - py37: python3.7 - py38: python3.8 -deps = - # this is derived requirements - -r requirements.txt - -r ./tests/requirements.txt -commands = - pip list - check-manifest - python setup.py check --metadata --strict - coverage run --source pytorch_lightning -m py.test pytorch_lightning tests -v --doctest-modules - coverage report - python setup.py sdist - twine check dist/* - -# DROP, it is duplication of setup.cfg -# [flake8] -# exclude = .tox,*.egg,build,temp,examples/* -# select = E,W,F -# doctests = True -# verbose = 2 -# https://pep8.readthedocs.io/en/latest/intro.html#error-codes -# format = pylint -# max-line-length = 100