Merge pull request #12 from artiomn/develop

Prints were replaced with a logging for #10
artiomn · Jan 27, 2022 · d094a44 · d094a44
2 parents 767969c + 0e95c9e
commit d094a44
Show file tree

Hide file tree

Showing 9 changed files with 38 additions and 27 deletions.
diff --git a/README.md b/README.md
@@ -1,11 +1,11 @@
-[![Python application](https://github.com/artiomn/markdown_images_downloader/workflows/Python%20application/badge.svg)](https://github.com/artiomn/markdown_articles_tool/actions/)
+[![Python package](https://github.com/artiomn/markdown_images_downloader/workflows/Python%20package/badge.svg)](https://github.com/artiomn/markdown_articles_tool/actions/)
 [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT)
 [![Stargazers](https://img.shields.io/github/stars/artiomn/markdown_images_downloader.svg)](https://github.com/artiomn/markdown_images_downloader/stargazers)
 [![Forks](https://img.shields.io/github/forks/artiomn/markdown_images_downloader.svg)](https://github.com/artiomn/markdown_images_downloader/network/members)
 [![Latest Release](https://img.shields.io/github/v/release/artiomn/markdown_images_downloader.svg)](https://github.com/artiomn/markdown_images_downloader/releases)
 
 
-# Markdown articles tool 0.0.9
+# Markdown articles tool 0.1.0
 
 Free command line utility, written in Python, designed to help you manage online and downloaded Markdown documents (e.g., articles).
 The Markdown Articles Tool is available for macOS, Windows, and Linux. 

diff --git a/markdown_tool.py b/markdown_tool.py
@@ -6,6 +6,7 @@
 
 import argparse
 from itertools import permutations
+import logging
 
 from mimetypes import types_map
 
@@ -26,6 +27,9 @@ def main(arguments):
     Entrypoint.
     """
 
+    logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%d.%m.%Y %H:%M:%S',
+                        level='DEBUG' if arguments.verbose else 'INFO')
+
     print(f'Markdown tool version {__version__} started...')
 
     processor = ArticleProcessor(skip_list=arguments.skip_list,
@@ -79,6 +83,8 @@ def main(arguments):
     parser.add_argument('-t', '--downloading-timeout', type=float, default=-1,
                         help='how many seconds to wait before downloading will be failed')
     parser.add_argument('-O', '--output-path', type=str, help='article output file name')
+    parser.add_argument('--verbose', '-v', default=False, action='store_true',
+                        help='More verbose logging')
     parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}', help='return version number')
 
     args = parser.parse_args()

diff --git a/markdown_toolset/__version__.py b/markdown_toolset/__version__.py
@@ -1 +1 @@
-__version__ = '0.0.9'
+__version__ = '0.1.0'
diff --git a/markdown_toolset/article_processor.py b/markdown_toolset/article_processor.py
@@ -1,3 +1,4 @@
+import logging
 from pathlib import Path
 from string import Template
 from time import strftime
@@ -34,7 +35,7 @@ def process(self):
         skip_list = self._process_skip_list()
         article_path, article_base_url = self._get_article()
 
-        print(f'File "{article_path}" will be processed...')
+        logging.info('File "%s" will be processed...', article_path)
 
         article_formatter = get_formatter(self._output_format, FORMATTERS)
 
@@ -52,7 +53,7 @@ def process(self):
             'base_url': article_base_url.lstrip('https://').lstrip('http://')
         }
 
-        print(f'Image public path: {Template(self._images_public_path).safe_substitute(**variables)}')
+        logging.info('Image public path: %s', Template(self._images_public_path).safe_substitute(**variables))
 
         img_downloader = ImageDownloader(
             article_path=article_path,
@@ -70,7 +71,7 @@ def process(self):
         format_article(article_out_path, result, article_formatter)
 
         if self._remove_source and article_path != article_out_path:
-            print(f'Removing source file "{article_path}"...')
+            logging.info('Removing source file "%s"...', article_path)
             Path(article_path).unlink()
 
     def _process_skip_list(self):
@@ -79,7 +80,7 @@ def _process_skip_list(self):
         if isinstance(skip_list, str):
             if skip_list.startswith('@'):
                 skip_list = skip_list[1:]
-                print(f'Reading skip list from a file "{skip_list}"...')
+                logging.info('Reading skip list from a file "%s"...', skip_list)
                 with open(Path(skip_list).expanduser(), 'r') as fsl:
                     skip_list = [s.strip() for s in fsl.readlines()]
             else:

diff --git a/markdown_toolset/formatters/helpers.py b/markdown_toolset/formatters/helpers.py
@@ -1,3 +1,4 @@
+import logging
 from pathlib import Path
 from typing import Any, List
 
@@ -15,7 +16,7 @@ def format_article(article_out_path: Path, article_text: str, formatter) -> None
     Save article in the selected format.
     """
 
-    print(f'Writing file into "{article_out_path}"...')
+    logging.info('Writing file into "%s"...', article_out_path)
 
     with open(article_out_path, 'wb') as outfile:
         outfile.write(formatter.write(article_text))
diff --git a/markdown_toolset/image_downloader.py b/markdown_toolset/image_downloader.py
@@ -1,3 +1,4 @@
+import logging
 from enum import Enum
 import hashlib
 from pathlib import Path
@@ -76,18 +77,17 @@ def download_images(self, images: List[str]) -> dict:
             assert image_url not in replacement_mapping.keys(), f'BUG: already downloaded image "{image_url}"...'
 
             if image_url in skip_list:
-                # TODO: Replace with logging.
-                print(f'Image {image_num + 1} ["{image_url}"] was skipped, because it\'s in the skip list...')
+                logging.debug('Image %d ["%s"] was skipped, because it\'s in the skip list...', image_num + 1, image_url)
                 continue
 
             image_path_is_url = is_url(image_url)
             if not image_path_is_url and not self._process_local_images:
-                print(f'Image {image_num + 1} ["{image_url}"] has incorrect URL...')
+                logging.warning('Image %d ["%s"] has incorrect URL...', image_num + 1, image_url)
                 if self._article_base_url:
-                    print(f'Trying to add base URL "{self._article_base_url}"...')
+                    logging.debug('Trying to add base URL "%s"...', self._article_base_url)
                     image_url = f'{self._article_base_url}/{image_url}'
                 else:
-                    print('Image downloading will be skipped...')
+                    logging.info('Image downloading will be skipped...')
                     continue
 
             try:
@@ -96,8 +96,9 @@ def download_images(self, images: List[str]) -> dict:
                     else ImageDownloader._get_local_image(Path(image_url))
             except Exception as e:
                 if self._skip_all_errors:
-                    print(f'Warning: can\'t get image {image_num + 1}, error: [{str(e)}], '
-                          'but processing will be continued, because `skip_all_errors` flag is set')
+                    logging.warning('Can\'t get image %d, error: [%s], '
+                                    'but processing will be continued, because `skip_all_errors` flag is set',
+                                    image_num + 1, str(e))
                     continue
                 raise
 
@@ -130,7 +131,7 @@ def download_images(self, images: List[str]) -> dict:
         return replacement_mapping
 
     def _get_remote_image(self, image_url: str, img_num: int, img_count: int):
-        print(f'Downloading image {img_num + 1} of {img_count} from "{image_url}"...')
+        logging.info('Downloading image %d of {img_count} from "%s"...', img_num + 1, image_url)
         img_response = download_from_url(image_url, self._downloading_timeout)
 
         return get_filename_from_url(img_response), img_response.content
@@ -149,7 +150,7 @@ def _write_image(image_path: Path, data: bytes):
         """
 
         # TODO: check if image already exists.
-        print(f'Image will be written to the file "{image_path}"...')
+        logging.info('Image will be written to the file "%s"...', image_path)
         with open(image_path, 'wb') as image_file:
             image_file.write(data)
             image_file.close()

diff --git a/markdown_toolset/transformers/html/transformer.py b/markdown_toolset/transformers/html/transformer.py
@@ -1,7 +1,7 @@
 """
 Images extractor from HTML document.
 """
-
+import logging
 from abc import ABC
 from html.parser import HTMLParser
 from typing import List, TextIO, Set
@@ -16,11 +16,11 @@ def __init__(self):
 
     def handle_starttag(self, tag, attrs):
         if 'img' == tag:
-            print('Image was found...')
+            logging.info('Image was found...')
             for a in attrs:
                 if 'src' == a[0] and a[1] is not None:
                     img_url = a[1]
-                    print(f'Image URL: {img_url}...')
+                    logging.debug(f'Image URL: {img_url}...')
                     self._image_urls.append(img_url)
                     break
 
@@ -47,14 +47,14 @@ def __init__(self, article_stream: TextIO, image_downloader):
     def _read_article(self) -> Set[str]:
         self._html_images.feed(self._article_stream.read())
         images = self._html_images.image_urls
-        print(f'Images links count = {len(images)}')
+        logging.info('Images links count = %d', len(images))
         images = set(images)
-        print(f'Unique images links count = {len(images)}')
+        logging.info(f'Unique images links count = %d', len(images))
 
         return images
 
     def _fix_document_urls(self) -> List[str]:
-        print('Replacing images urls in the document...')
+        logging.debug('Replacing images urls in the document...')
         replacement_mapping = self._replacement_mapping
         lines = []
         self._article_stream.seek(self._start_pos)

diff --git a/markdown_toolset/transformers/md/transformer.py b/markdown_toolset/transformers/md/transformer.py
@@ -1,6 +1,7 @@
 """
 Images extractor from markdown document.
 """
+import logging
 
 import markdown
 from markdown.treeprocessors import Treeprocessor
@@ -44,14 +45,14 @@ def __init__(self, article_stream: TextIO, image_downloader):
 
     def _read_article(self) -> Set[str]:
         self._md_conv.convert(self._article_stream.read())
-        print(f'Images links count = {len(self._md_conv.images)}')
+        logging.info('Images links count = %d', len(self._md_conv.images))
         images = set(self._md_conv.images)
-        print(f'Unique images links count = {len(images)}')
+        logging.debug('Unique images links count = %d', len(images))
 
         return images
 
     def _fix_document_urls(self) -> List[str]:
-        print('Replacing images urls in the document...')
+        logging.debug('Replacing images urls in the document...')
         replacement_mapping = self._replacement_mapping
         lines = []
         self._article_stream.seek(self._start_pos)

diff --git a/markdown_toolset/www_tools.py b/markdown_toolset/www_tools.py
@@ -1,6 +1,7 @@
 """
 Some functions useful for the working with URLs and network.
 """
+import logging
 
 import requests
 from typing import Optional
@@ -37,7 +38,7 @@ def download_from_url(url: str, timeout=None):
     try:
         response = requests.get(url, allow_redirects=True, timeout=timeout, headers=NECESSARY_HEADERS)
     except requests.exceptions.SSLError:
-        print('Incorrect SSL certificate, trying to download without verifying...')
+        logging.warning('Incorrect SSL certificate, trying to download without verifying...')
         response = requests.get(url, allow_redirects=True, verify=False,
                                 timeout=timeout, headers=NECESSARY_HEADERS)