From 2d7a343c63978f63f14c9bbab6fe4224941d4d25 Mon Sep 17 00:00:00 2001 From: Julian Frimmel Date: Fri, 17 Jan 2025 22:10:24 +0100 Subject: [PATCH] Fix `tarfile`-warnings due to PEP 706 This commit addresses some warnings issued by Python 3.12+ with regards to the behavior of `tarfile.extractall()`. Currently that method gets called in two places: the URL and GitHub provider. Both essentially trust the archive to not be malicious at the moment: tarfiles can be crafted to overwrite other parts of the system and have strange links or even device files... [PEP 706] tries to fix this potential security vulnerability in a large amount of code written today by restricting changing the behavior of the aforementioned method in Python 3.14. At the moment, a warning is issued to apply a suitable filter parameter. This commit uses such a filter, if the python version running the code supports it. The implementation is based on conda/conda-package-streaming#96, which is a pull request fixing the same thing. The solution of adding the new filter argument only if supported is elegant and backwards-compatible. The `data`-filter was chosen, since the archives this project deals with are typically exactly that: an archive of plain old directories with regular files in them. Applying this commit reduces the number of warnings reported by the test suite from five down to zero. The previous output was: ```log =================================================== warnings summary =================================================== tests/test_coremanager.py::test_export tests/test_coremanager.py::test_export tests/test_coremanager.py::test_export tests/test_provider.py::test_github_provider /home/jfrimmel/git/fusesoc/.tox/py3/lib/python3.13/site-packages/fusesoc/provider/github.py:44: DeprecationWarning: Python 3.14 will, by default, filter extracted tar archives and reject files or modify their metadata. Use the filter argument to control this behavior. t.extractall(cache_root) tests/test_provider.py::test_url_provider /home/jfrimmel/git/fusesoc/.tox/py3/lib/python3.13/site-packages/fusesoc/provider/url.py:47: DeprecationWarning: Python 3.14 will, by default, filter extracted tar archives and reject files or modify their metadata. Use the filter argument to control this behavior. t.extractall(local_dir) -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ``` It's a bit unfortunate, that the same change needed to be performed in two places, but I wanted to keep the diff as minimal as possible and did not want to introduce a new module or similar. [PEP 706]: https://peps.python.org/pep-0706/ --- fusesoc/provider/github.py | 9 ++++++++- fusesoc/provider/url.py | 7 ++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fusesoc/provider/github.py b/fusesoc/provider/github.py index 22cd41cc..fc08fa24 100644 --- a/fusesoc/provider/github.py +++ b/fusesoc/provider/github.py @@ -9,6 +9,8 @@ from fusesoc.provider.provider import Provider +_HAS_TAR_FILTER = hasattr(tarfile, "tar_filter") # Requires Python 3.12 + logger = logging.getLogger(__name__) if sys.version_info[0] >= 3: @@ -41,5 +43,10 @@ def _checkout(self, local_dir): # Ugly hack to get the first part of the directory name of the extracted files tmp = t.getnames()[0] - t.extractall(cache_root) + + extraction_arguments = {"path": cache_root} + if _HAS_TAR_FILTER: + extraction_arguments["filter"] = "data" + t.extractall(**extraction_arguments) + os.rename(os.path.join(cache_root, tmp), os.path.join(cache_root, core)) diff --git a/fusesoc/provider/url.py b/fusesoc/provider/url.py index 023fe126..8a7b13fc 100644 --- a/fusesoc/provider/url.py +++ b/fusesoc/provider/url.py @@ -21,6 +21,8 @@ from fusesoc.provider.provider import Provider +_HAS_TAR_FILTER = hasattr(tarfile, "tar_filter") # Requires Python 3.12 + class Url(Provider): def _checkout(self, local_dir): @@ -44,7 +46,10 @@ def _checkout(self, local_dir): filetype = self.config.get("filetype") if filetype == "tar": t = tarfile.open(filename) - t.extractall(local_dir) + extraction_arguments = {"path": local_dir} + if _HAS_TAR_FILTER: + extraction_arguments["filter"] = "data" + t.extractall(**extraction_arguments) elif filetype == "zip": with zipfile.ZipFile(filename, "r") as z: z.extractall(local_dir)