From ea6b1400e22e4a07f1201c0c1910e1dcc8506832 Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 20:15:55 +0100 Subject: [PATCH 01/14] :hammer: updates to please pypi --- setup.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index fe0f38b..717a70e 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_package_description(): def get_requirements(): """Returns all requirements for this package.""" with open("requirements.txt") as f: - requirements = f.read().splitlines() + requirements = f.readlines() return requirements @@ -33,8 +33,9 @@ def get_requirements(): long_description=get_package_description(), long_description_content_type="text/markdown", url="https://github.com/pszemraj/confectionary", - packages=setuptools.find_packages(), - install_requires=get_requirements(), + # packages=setuptools.find_packages(), + package_dir={"": "confectionary"}, + packages=get_requirements(), classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache 2.0 License", @@ -46,4 +47,4 @@ def get_requirements(): "enabled": True, }, setup_requires=["setuptools-git-versioning"], -) +) \ No newline at end of file From 813883875583dc5dcc8b7984589210d8281ff4c3 Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 20:25:15 +0100 Subject: [PATCH 02/14] :hammer: revert to traditional means --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 717a70e..29ceeae 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ def get_requirements(): url="https://github.com/pszemraj/confectionary", # packages=setuptools.find_packages(), package_dir={"": "confectionary"}, - packages=get_requirements(), + packages=setuptools.find_packages(where="confectionary"), classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache 2.0 License", From ad972877ba2d4359c08a572bbd65cff8df317941 Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 20:30:50 +0100 Subject: [PATCH 03/14] :hammer: remove buggy import --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index 29ceeae..f689ace 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,6 @@ import glob import setuptools -import confectionary - def get_scripts_from_bin(): """Get all local scripts from bin so they are included in the package.""" From e3e876587843bd2db56df415d8720381fd699d23 Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 20:31:15 +0100 Subject: [PATCH 04/14] :art: format to black --- confectionary/pdf.py | 4 ++-- confectionary/utils.py | 2 +- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/confectionary/pdf.py b/confectionary/pdf.py index 5c211fd..4e4e9b7 100644 --- a/confectionary/pdf.py +++ b/confectionary/pdf.py @@ -319,9 +319,9 @@ def figure_title(self, title: str): figure_title - add a figure title to the PDF """ self.set_font("helvetica", "B", 14) - th = self.font_size # Text height + th = self.font_size # Text height self.multi_cell(w=0, h=th, txt=title, border="B", ln=1, align="L", fill=False) - self.ln() # Line break + self.ln() # Line break def write_big_title(self, a_title: str, font_size=24, font_style="B"): """ diff --git a/confectionary/utils.py b/confectionary/utils.py index f087f50..d9e17db 100644 --- a/confectionary/utils.py +++ b/confectionary/utils.py @@ -60,7 +60,7 @@ def fix_punct_spaces(string: str): string = string.replace("_ -", "-") string = string.replace(" _ ", "-") string = string.replace("_ ", "-") - string = string.strip('_') + string = string.strip("_") return string.strip() diff --git a/setup.py b/setup.py index f689ace..ec5050c 100644 --- a/setup.py +++ b/setup.py @@ -45,4 +45,4 @@ def get_requirements(): "enabled": True, }, setup_requires=["setuptools-git-versioning"], -) \ No newline at end of file +) From 9d49d8da163dde9f5e7f2187ce8d39482e132a5e Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 20:40:58 +0100 Subject: [PATCH 05/14] :hammer: load README handling errors --- setup.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index ec5050c..303ce49 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import glob import setuptools +from pathlib import Path def get_scripts_from_bin(): @@ -9,10 +10,18 @@ def get_scripts_from_bin(): def get_package_description(): """Returns a description of this package from the markdown files.""" - with open("README.md", "r") as stream: - readme: str = stream.read() - with open("HISTORY.md", "r") as stream: - history: str = stream.read() + _readme = Path("README.md") + _history = Path("HISTORY.md") + if _readme.exists() and _history.exists(): + with open(_readme.resolve(), "r", encoding="utf-8", errors="ignore") as f: + readme = f.read() + else: + readme = "README" + if _history.exists(): + with open(_history.resolve(), "r", encoding="utf-8", errors="ignore") as f: + history = f.read() + else: + history = "No history yet." return f"{readme}\n\n{history}" From 9c9e68fd89d1d0257dced28613d84b52ee651dba Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 20:41:15 +0100 Subject: [PATCH 06/14] :hammer: init content --- HISTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.md b/HISTORY.md index e69de29..8d32e8e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -0,0 +1 @@ +# History \ No newline at end of file From 8bd69ab92fd814b1f0b197dcf0d8fcc80b855139 Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 20:51:31 +0100 Subject: [PATCH 07/14] :hammer: fix classifiers :pencil2: --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 303ce49..69e8a6f 100644 --- a/setup.py +++ b/setup.py @@ -45,8 +45,11 @@ def get_requirements(): packages=setuptools.find_packages(where="confectionary"), classifiers=[ "Programming Language :: Python :: 3", - "License :: OSI Approved :: Apache 2.0 License", + "License :: OSI Approved :: Apache Software License", + "Development Status :: 3 - Alpha", "Operating System :: OS Independent", + "Natural Language :: English", + "Topic :: Text Processing", ], scripts=get_scripts_from_bin(), python_requires=">=3.7", From dc04d9320e0c3af8a791510fd62dd178cf5f682e Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 21:10:58 +0100 Subject: [PATCH 08/14] :memo: update with pypi details --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2bafbf7..5067cc1 100644 --- a/README.md +++ b/README.md @@ -21,10 +21,14 @@ PDF Confectionary was originally designed to be used as a command line tool, but ### Requirements - Requirements are listed in the requirements.txt file. -- the primary requirements are: FPDF2, textsplit, and cleantext. +- the primary requirements are: [FPDF2](https://pyfpdf.github.io/fpdf2/index.html), [textsplit](https://github.com/chschock/textsplit), and [clean-text](https://github.com/jfilter/clean-text). ### Package Installation +The package can be installed using pip: + + pip install confectionary + To install as a python package without pip, run: 1. `git clone ` @@ -82,7 +86,7 @@ PDF file saved to C:\Users\peter\code-dev-22\misc-repos\text2pdf\example\outputs ## TODO list - [x] convert the text2pdf.py script to a module/function -- [ ] publish to PyPI *IN-PROGRESS* +- [x] publish to PyPI *IN-PROGRESS* - [ ] improve TOC calculation beyond a simple title threshold - [ ] add alternate, smaller, word2vec models for splitting paragraphs - [ ] Add a basic notebook demo From 47543e157c86e011e711d9dd7ee66e390c6d3510 Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 21:11:16 +0100 Subject: [PATCH 09/14] :hammer: update README parsing --- setup.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 69e8a6f..0e19dea 100644 --- a/setup.py +++ b/setup.py @@ -3,10 +3,6 @@ from pathlib import Path -def get_scripts_from_bin(): - """Get all local scripts from bin so they are included in the package.""" - return glob.glob("bin/*") - def get_package_description(): """Returns a description of this package from the markdown files.""" @@ -24,6 +20,19 @@ def get_package_description(): history = "No history yet." return f"{readme}\n\n{history}" +try: + with open("README.md", "r", encoding="utf-8") as fh: + long_description = fh.read() +except FileNotFoundError as e: + print(f"could not read README.md: {e}") + long_description = get_package_description() + + +def get_scripts_from_bin(): + """Get all local scripts from bin so they are included in the package.""" + return glob.glob("bin/*") + + def get_requirements(): """Returns all requirements for this package.""" @@ -37,7 +46,7 @@ def get_requirements(): author="Peter Szemraj, Jonathan Lehner", author_email="szemraj.dev@gmail.com", description="A tool to quickly create sweet PDF files from text files.", - long_description=get_package_description(), + long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/pszemraj/confectionary", # packages=setuptools.find_packages(), From d103ac132a77996079ff2f85908e352765497917 Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 21:25:12 +0100 Subject: [PATCH 10/14] :hammer: add requirements back --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 0e19dea..12db706 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ def get_requirements(): # packages=setuptools.find_packages(), package_dir={"": "confectionary"}, packages=setuptools.find_packages(where="confectionary"), + requires=get_requirements(), classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", From 114693f4cb01f25242b90d04b0e0ae4cfbf6bee7 Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 21:34:21 +0100 Subject: [PATCH 11/14] :wrench: reorder deps --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c698896..c21c9db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -clean-text unidecode +clean-text fpdf2 gensim natsort From 1fb6f76c422ced073e5a8dacf73790bcef445a7a Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 21:34:36 +0100 Subject: [PATCH 12/14] :hammer: update reqs kw --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 12db706..b77ed14 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ def get_requirements(): # packages=setuptools.find_packages(), package_dir={"": "confectionary"}, packages=setuptools.find_packages(where="confectionary"), - requires=get_requirements(), + install_requires=get_requirements(), classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", From 6ec30dc794f63ca454e14e8d2da348ab738d93c4 Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 21:40:45 +0100 Subject: [PATCH 13/14] :hammer: move function calls outside setup --- setup.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index b77ed14..994f5a7 100644 --- a/setup.py +++ b/setup.py @@ -20,14 +20,6 @@ def get_package_description(): history = "No history yet." return f"{readme}\n\n{history}" -try: - with open("README.md", "r", encoding="utf-8") as fh: - long_description = fh.read() -except FileNotFoundError as e: - print(f"could not read README.md: {e}") - long_description = get_package_description() - - def get_scripts_from_bin(): """Get all local scripts from bin so they are included in the package.""" return glob.glob("bin/*") @@ -40,6 +32,16 @@ def get_requirements(): requirements = f.readlines() return requirements +try: + with open("README.md", "r", encoding="utf-8") as fh: + long_description = fh.read() +except FileNotFoundError as e: + print(f"could not read README.md: {e}") + long_description = get_package_description() + +requirements = get_requirements() +scripts = get_scripts_from_bin() + setuptools.setup( name="confectionary", @@ -52,7 +54,7 @@ def get_requirements(): # packages=setuptools.find_packages(), package_dir={"": "confectionary"}, packages=setuptools.find_packages(where="confectionary"), - install_requires=get_requirements(), + install_requires=requirements, classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", @@ -61,7 +63,7 @@ def get_requirements(): "Natural Language :: English", "Topic :: Text Processing", ], - scripts=get_scripts_from_bin(), + scripts=scripts, python_requires=">=3.7", setuptools_git_versioning={ "enabled": True, From d2167c83cc6bc0a7c85b86b28ba608a60355909b Mon Sep 17 00:00:00 2001 From: Peter Szemraj Date: Fri, 18 Feb 2022 21:41:17 +0100 Subject: [PATCH 14/14] :art: reformat --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 994f5a7..8d0084c 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,6 @@ from pathlib import Path - def get_package_description(): """Returns a description of this package from the markdown files.""" _readme = Path("README.md") @@ -20,18 +19,19 @@ def get_package_description(): history = "No history yet." return f"{readme}\n\n{history}" + def get_scripts_from_bin(): """Get all local scripts from bin so they are included in the package.""" return glob.glob("bin/*") - def get_requirements(): """Returns all requirements for this package.""" with open("requirements.txt") as f: requirements = f.readlines() return requirements + try: with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read()