diff --git a/README.md b/README.md index 23c68ee..bf8c07b 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ -[![Python application](https://github.com/artiomn/markdown_images_downloader/workflows/Python%20application/badge.svg)](https://github.com/artiomn/markdown_articles_tool/actions/) +[![Python package](https://github.com/artiomn/markdown_images_downloader/workflows/Python%20package/badge.svg)](https://github.com/artiomn/markdown_articles_tool/actions/) [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT) [![Stargazers](https://img.shields.io/github/stars/artiomn/markdown_images_downloader.svg)](https://github.com/artiomn/markdown_images_downloader/stargazers) [![Forks](https://img.shields.io/github/forks/artiomn/markdown_images_downloader.svg)](https://github.com/artiomn/markdown_images_downloader/network/members) [![Latest Release](https://img.shields.io/github/v/release/artiomn/markdown_images_downloader.svg)](https://github.com/artiomn/markdown_images_downloader/releases) -# Markdown articles tool 0.0.9 +# Markdown articles tool 0.1.0 Free command line utility, written in Python, designed to help you manage online and downloaded Markdown documents (e.g., articles). The Markdown Articles Tool is available for macOS, Windows, and Linux. diff --git a/markdown_tool.py b/markdown_tool.py index 54b8e0f..83c19d7 100755 --- a/markdown_tool.py +++ b/markdown_tool.py @@ -6,6 +6,7 @@ import argparse from itertools import permutations +import logging from mimetypes import types_map @@ -26,6 +27,9 @@ def main(arguments): Entrypoint. """ + logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%d.%m.%Y %H:%M:%S', + level='DEBUG' if arguments.verbose else 'INFO') + print(f'Markdown tool version {__version__} started...') processor = ArticleProcessor(skip_list=arguments.skip_list, @@ -79,6 +83,8 @@ def main(arguments): parser.add_argument('-t', '--downloading-timeout', type=float, default=-1, help='how many seconds to wait before downloading will be failed') parser.add_argument('-O', '--output-path', type=str, help='article output file name') + parser.add_argument('--verbose', '-v', default=False, action='store_true', + help='More verbose logging') parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}', help='return version number') args = parser.parse_args() diff --git a/markdown_toolset/__version__.py b/markdown_toolset/__version__.py index 9d1ffab..b794fd4 100644 --- a/markdown_toolset/__version__.py +++ b/markdown_toolset/__version__.py @@ -1 +1 @@ -__version__ = '0.0.9' +__version__ = '0.1.0' diff --git a/markdown_toolset/article_processor.py b/markdown_toolset/article_processor.py index 5fbc460..7ee8d38 100644 --- a/markdown_toolset/article_processor.py +++ b/markdown_toolset/article_processor.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path from string import Template from time import strftime @@ -34,7 +35,7 @@ def process(self): skip_list = self._process_skip_list() article_path, article_base_url = self._get_article() - print(f'File "{article_path}" will be processed...') + logging.info('File "%s" will be processed...', article_path) article_formatter = get_formatter(self._output_format, FORMATTERS) @@ -52,7 +53,7 @@ def process(self): 'base_url': article_base_url.lstrip('https://').lstrip('http://') } - print(f'Image public path: {Template(self._images_public_path).safe_substitute(**variables)}') + logging.info('Image public path: %s', Template(self._images_public_path).safe_substitute(**variables)) img_downloader = ImageDownloader( article_path=article_path, @@ -70,7 +71,7 @@ def process(self): format_article(article_out_path, result, article_formatter) if self._remove_source and article_path != article_out_path: - print(f'Removing source file "{article_path}"...') + logging.info('Removing source file "%s"...', article_path) Path(article_path).unlink() def _process_skip_list(self): @@ -79,7 +80,7 @@ def _process_skip_list(self): if isinstance(skip_list, str): if skip_list.startswith('@'): skip_list = skip_list[1:] - print(f'Reading skip list from a file "{skip_list}"...') + logging.info('Reading skip list from a file "%s"...', skip_list) with open(Path(skip_list).expanduser(), 'r') as fsl: skip_list = [s.strip() for s in fsl.readlines()] else: diff --git a/markdown_toolset/formatters/helpers.py b/markdown_toolset/formatters/helpers.py index 8072a41..a3bf741 100644 --- a/markdown_toolset/formatters/helpers.py +++ b/markdown_toolset/formatters/helpers.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path from typing import Any, List @@ -15,7 +16,7 @@ def format_article(article_out_path: Path, article_text: str, formatter) -> None Save article in the selected format. """ - print(f'Writing file into "{article_out_path}"...') + logging.info('Writing file into "%s"...', article_out_path) with open(article_out_path, 'wb') as outfile: outfile.write(formatter.write(article_text)) diff --git a/markdown_toolset/image_downloader.py b/markdown_toolset/image_downloader.py index af69192..46eb370 100644 --- a/markdown_toolset/image_downloader.py +++ b/markdown_toolset/image_downloader.py @@ -1,3 +1,4 @@ +import logging from enum import Enum import hashlib from pathlib import Path @@ -76,18 +77,17 @@ def download_images(self, images: List[str]) -> dict: assert image_url not in replacement_mapping.keys(), f'BUG: already downloaded image "{image_url}"...' if image_url in skip_list: - # TODO: Replace with logging. - print(f'Image {image_num + 1} ["{image_url}"] was skipped, because it\'s in the skip list...') + logging.debug('Image %d ["%s"] was skipped, because it\'s in the skip list...', image_num + 1, image_url) continue image_path_is_url = is_url(image_url) if not image_path_is_url and not self._process_local_images: - print(f'Image {image_num + 1} ["{image_url}"] has incorrect URL...') + logging.warning('Image %d ["%s"] has incorrect URL...', image_num + 1, image_url) if self._article_base_url: - print(f'Trying to add base URL "{self._article_base_url}"...') + logging.debug('Trying to add base URL "%s"...', self._article_base_url) image_url = f'{self._article_base_url}/{image_url}' else: - print('Image downloading will be skipped...') + logging.info('Image downloading will be skipped...') continue try: @@ -96,8 +96,9 @@ def download_images(self, images: List[str]) -> dict: else ImageDownloader._get_local_image(Path(image_url)) except Exception as e: if self._skip_all_errors: - print(f'Warning: can\'t get image {image_num + 1}, error: [{str(e)}], ' - 'but processing will be continued, because `skip_all_errors` flag is set') + logging.warning('Can\'t get image %d, error: [%s], ' + 'but processing will be continued, because `skip_all_errors` flag is set', + image_num + 1, str(e)) continue raise @@ -130,7 +131,7 @@ def download_images(self, images: List[str]) -> dict: return replacement_mapping def _get_remote_image(self, image_url: str, img_num: int, img_count: int): - print(f'Downloading image {img_num + 1} of {img_count} from "{image_url}"...') + logging.info('Downloading image %d of {img_count} from "%s"...', img_num + 1, image_url) img_response = download_from_url(image_url, self._downloading_timeout) return get_filename_from_url(img_response), img_response.content @@ -149,7 +150,7 @@ def _write_image(image_path: Path, data: bytes): """ # TODO: check if image already exists. - print(f'Image will be written to the file "{image_path}"...') + logging.info('Image will be written to the file "%s"...', image_path) with open(image_path, 'wb') as image_file: image_file.write(data) image_file.close() diff --git a/markdown_toolset/transformers/html/transformer.py b/markdown_toolset/transformers/html/transformer.py index e6e24a1..23b5a07 100644 --- a/markdown_toolset/transformers/html/transformer.py +++ b/markdown_toolset/transformers/html/transformer.py @@ -1,7 +1,7 @@ """ Images extractor from HTML document. """ - +import logging from abc import ABC from html.parser import HTMLParser from typing import List, TextIO, Set @@ -16,11 +16,11 @@ def __init__(self): def handle_starttag(self, tag, attrs): if 'img' == tag: - print('Image was found...') + logging.info('Image was found...') for a in attrs: if 'src' == a[0] and a[1] is not None: img_url = a[1] - print(f'Image URL: {img_url}...') + logging.debug(f'Image URL: {img_url}...') self._image_urls.append(img_url) break @@ -47,14 +47,14 @@ def __init__(self, article_stream: TextIO, image_downloader): def _read_article(self) -> Set[str]: self._html_images.feed(self._article_stream.read()) images = self._html_images.image_urls - print(f'Images links count = {len(images)}') + logging.info('Images links count = %d', len(images)) images = set(images) - print(f'Unique images links count = {len(images)}') + logging.info(f'Unique images links count = %d', len(images)) return images def _fix_document_urls(self) -> List[str]: - print('Replacing images urls in the document...') + logging.debug('Replacing images urls in the document...') replacement_mapping = self._replacement_mapping lines = [] self._article_stream.seek(self._start_pos) diff --git a/markdown_toolset/transformers/md/transformer.py b/markdown_toolset/transformers/md/transformer.py index 9aac921..bf95484 100644 --- a/markdown_toolset/transformers/md/transformer.py +++ b/markdown_toolset/transformers/md/transformer.py @@ -1,6 +1,7 @@ """ Images extractor from markdown document. """ +import logging import markdown from markdown.treeprocessors import Treeprocessor @@ -44,14 +45,14 @@ def __init__(self, article_stream: TextIO, image_downloader): def _read_article(self) -> Set[str]: self._md_conv.convert(self._article_stream.read()) - print(f'Images links count = {len(self._md_conv.images)}') + logging.info('Images links count = %d', len(self._md_conv.images)) images = set(self._md_conv.images) - print(f'Unique images links count = {len(images)}') + logging.debug('Unique images links count = %d', len(images)) return images def _fix_document_urls(self) -> List[str]: - print('Replacing images urls in the document...') + logging.debug('Replacing images urls in the document...') replacement_mapping = self._replacement_mapping lines = [] self._article_stream.seek(self._start_pos) diff --git a/markdown_toolset/www_tools.py b/markdown_toolset/www_tools.py index 708e2f0..bc2e58c 100644 --- a/markdown_toolset/www_tools.py +++ b/markdown_toolset/www_tools.py @@ -1,6 +1,7 @@ """ Some functions useful for the working with URLs and network. """ +import logging import requests from typing import Optional @@ -37,7 +38,7 @@ def download_from_url(url: str, timeout=None): try: response = requests.get(url, allow_redirects=True, timeout=timeout, headers=NECESSARY_HEADERS) except requests.exceptions.SSLError: - print('Incorrect SSL certificate, trying to download without verifying...') + logging.warning('Incorrect SSL certificate, trying to download without verifying...') response = requests.get(url, allow_redirects=True, verify=False, timeout=timeout, headers=NECESSARY_HEADERS)