From 388db5a1a0e10ad4ae6d622d270536a90fd6ee6f Mon Sep 17 00:00:00 2001 From: Shivendra Date: Tue, 3 Sep 2024 23:36:09 +0530 Subject: [PATCH 1/2] deploying as a package --- .gitignore | 3 +++ README.md | 28 ++++++++++++++-------------- requirements.txt | 10 ++-------- run.py/run_britannica.py | 4 ++-- run.py/run_freesound.py | 2 +- run.py/run_pexels.py | 4 ++-- run.py/run_transcripts.py | 4 ++-- run.py/run_unsplash.py | 4 ++-- run.py/run_wiki.py | 4 ++-- setup.py | 34 ++++++++++++++++++++++++++++++++++ 10 files changed, 64 insertions(+), 33 deletions(-) create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index b86eefa..8579bf7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,9 +4,12 @@ __pycache__/ *.py[cod] *.exe +*.pypirc build .vscode +*.egg-info/ + # extras *.env Datasets diff --git a/README.md b/README.md index 1ad4384..8eda19a 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ This library contains some topics, keywords, search queries & channel ids which #### Channel Ids ```python -from graze.queries import Queries +from webgraze.queries import Queries queries = Queries(category="channel") ``` @@ -54,7 +54,7 @@ queries = Queries(category="channel") #### Search Queries ```python -from graze.queries import Queries +from webgraze.queries import Queries queries = Queries(category="search") ``` @@ -62,7 +62,7 @@ queries = Queries(category="search") #### Image Topics ```python -from graze.queries import Queries +from webgraze.queries import Queries queries = Queries(category="channel") ``` @@ -96,8 +96,8 @@ os.chdir(current_directory) api_key = os.getenv('yt_key') -from graze import Youtube -from graze.queries import Queries +from webgraze import Youtube +from webgraze.queries import Queries queries = Queries(category="channel") @@ -112,8 +112,8 @@ The Wikipedia scraper generates target URLs from provided queries, fetches the c #### Running the Scraper ```python -from graze import Wikipedia -from graze.queries import Queries +from webgraze import Wikipedia +from webgraze.queries import Queries queries = Queries(category="search") wiki = Wikipedia(filepath='../data.txt', metrics=True) @@ -134,8 +134,8 @@ The Unsplash Image scraper fetches images based on given topics & saves them in #### Running the Scraper ```python -from graze import Unsplash -from graze.queries import Queries +from webgraze import Unsplash +from webgraze.queries import Queries topics = Queries("images") @@ -159,8 +159,8 @@ The Britannica scraper generates target URLs from provided queries, fetches the #### Running the scraper ```python -from graze import Britannica -from graze.queries import Queries +from webgraze import Britannica +from webgraze.queries import Queries queries = Queries(category="search") scraper = Britannica(filepath='../data.txt', metrics=True) @@ -183,7 +183,7 @@ load_dotenv() API_KEY = os.getenv("freesound_key") -from graze import Freesound +from webgraze import Freesound sound = Freesound(api_key=API_KEY, download_dir="audios", metrics=True) sound(topics=["clicks", "background", "nature"]) @@ -220,8 +220,8 @@ Scrapes & downloads pictures from [pexels.com](https://www.pexels.com/) & saves #### Running the scraper ```python -from graze import Pexels -from graze.queries import Queries +from webgraze import Pexels +from webgraze.queries import Queries queries = Queries("images") scraper = Pexels(directory="./images", metrics=True) diff --git a/requirements.txt b/requirements.txt index 96e1f5c..efa01c3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,7 @@ bs4 requests tqdm -timeit -json -re -googleapiclient +google-api-python-client youtube_transcript_api -logging -typing selenium -webdriver-manager -base64 \ No newline at end of file +webdriver-manager \ No newline at end of file diff --git a/run.py/run_britannica.py b/run.py/run_britannica.py index bd16b8c..06e7694 100644 --- a/run.py/run_britannica.py +++ b/run.py/run_britannica.py @@ -2,8 +2,8 @@ current_directory = os.path.dirname(os.path.abspath(__file__)) os.chdir(current_directory) -from graze import Britannica -from graze.queries import Queries +from webgraze import Britannica +from webgraze.queries import Queries queries = Queries(category="search") wiki = Britannica(filepath='../data.txt', metrics=True) diff --git a/run.py/run_freesound.py b/run.py/run_freesound.py index b9bb84b..7cffca9 100644 --- a/run.py/run_freesound.py +++ b/run.py/run_freesound.py @@ -6,7 +6,7 @@ API_KEY = os.getenv("freesound_key") -from graze import Freesound +from webgraze import Freesound sound = Freesound(api_key=API_KEY, download_dir="audios", metrics=True) sound(topics=["clicks", "background", "nature"]) \ No newline at end of file diff --git a/run.py/run_pexels.py b/run.py/run_pexels.py index 57950e5..101521c 100644 --- a/run.py/run_pexels.py +++ b/run.py/run_pexels.py @@ -1,5 +1,5 @@ -from graze import Pexels -from graze.queries import Queries +from webgraze import Pexels +from webgraze.queries import Queries queries = Queries("images") scraper = Pexels(directory="./images", metrics=True) diff --git a/run.py/run_transcripts.py b/run.py/run_transcripts.py index bef766a..25407f6 100644 --- a/run.py/run_transcripts.py +++ b/run.py/run_transcripts.py @@ -6,8 +6,8 @@ api_key = os.getenv('yt_key') -from graze import Youtube -from graze.queries import Queries +from webgraze import Youtube +from webgraze.queries import Queries queries = Queries(category="channel") diff --git a/run.py/run_unsplash.py b/run.py/run_unsplash.py index 09c801e..fe530f5 100644 --- a/run.py/run_unsplash.py +++ b/run.py/run_unsplash.py @@ -2,8 +2,8 @@ current_directory = os.path.dirname(os.path.abspath(__file__)) os.chdir(current_directory) -from graze import Unsplash -from graze.queries import Queries +from webgraze import Unsplash +from webgraze.queries import Queries topics = Queries("images") diff --git a/run.py/run_wiki.py b/run.py/run_wiki.py index 30f5eaf..6410a48 100644 --- a/run.py/run_wiki.py +++ b/run.py/run_wiki.py @@ -1,5 +1,5 @@ -from graze import Wikipedia -from graze.queries import Queries +from webgraze import Wikipedia +from webgraze.queries import Queries queries = Queries(category="search") wiki = Wikipedia(filepath='../data.txt', metrics=True) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ca6d7e4 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +from setuptools import setup, find_packages +import codecs +import os + +current_dir = os.path.abspath(os.path.dirname(__file__)) + +with codecs.open(os.path.join(current_dir, "README.md"), encoding="utf-8") as file: + long_description = "\n" + file.read() + +with open("requirements.txt", encoding="utf-8") as f: + required = f.read().splitlines() + +VERSION = '1.1.2' +DESCRIPTION = 'WebScraping library that scrapes & gathers data from multiple sources on the internet' + +setup( + name="webgraze", + version=VERSION, + author="shivendra", + author_email="", + description=DESCRIPTION, + long_description=long_description, + long_description_content_type="text/markdown", + license="MIT", + packages=find_packages(), + keywords=["webscraping", "scraping", "webscraping library", "web scraping", "python webscraping", "beautifulsoup", "selenium"], + classifiers=[ + "Development Status :: 1 - Planning", + "Intended Audience :: Developers", + "Programing Language :: Python", + "Operating System :: Windows" + ], + install_requires=required, +) \ No newline at end of file From bf12c06296ddacaa1ec5e69cfb4f3e1050771fbc Mon Sep 17 00:00:00 2001 From: Shivendra Date: Tue, 3 Sep 2024 23:52:39 +0530 Subject: [PATCH 2/2] deployed the package --- setup.py | 23 ++++++++++++++++------- test.py | 12 +++++------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/setup.py b/setup.py index ca6d7e4..f833bd8 100644 --- a/setup.py +++ b/setup.py @@ -7,9 +7,6 @@ with codecs.open(os.path.join(current_dir, "README.md"), encoding="utf-8") as file: long_description = "\n" + file.read() -with open("requirements.txt", encoding="utf-8") as f: - required = f.read().splitlines() - VERSION = '1.1.2' DESCRIPTION = 'WebScraping library that scrapes & gathers data from multiple sources on the internet' @@ -17,7 +14,7 @@ name="webgraze", version=VERSION, author="shivendra", - author_email="", + author_email="shivharsh44@gmail.com", description=DESCRIPTION, long_description=long_description, long_description_content_type="text/markdown", @@ -27,8 +24,20 @@ classifiers=[ "Development Status :: 1 - Planning", "Intended Audience :: Developers", - "Programing Language :: Python", - "Operating System :: Windows" + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: MIT License", + ], + install_requires=[ + "bs4", + "tqdm", + "google-api-python-client", + "requests", + "youtube-transcript-api", + "selenium", + "webdriver-manager", ], - install_requires=required, ) \ No newline at end of file diff --git a/test.py b/test.py index 1af6eec..101521c 100644 --- a/test.py +++ b/test.py @@ -1,8 +1,6 @@ -import os -current_directory = os.path.dirname(os.path.abspath(__file__)) -os.chdir(current_directory) +from webgraze import Pexels +from webgraze.queries import Queries -from graze import Freesound - -sound = Freesound(api_key="lMKgKjaRmNMZKKxNqkjx", download_dir="audios", metrics=True) -sound(topics=["clicks", "background", "nature"]) \ No newline at end of file +queries = Queries("images") +scraper = Pexels(directory="./images", metrics=True) +scraper(topics=queries()) \ No newline at end of file