Skip to content

Commit

Permalink
Merge pull request #12 from artiomn/develop
Browse files Browse the repository at this point in the history
Prints were replaced with a logging for #10
  • Loading branch information
artiomn authored Jan 27, 2022
2 parents 767969c + 0e95c9e commit d094a44
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 27 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
[![Python application](https://github.com/artiomn/markdown_images_downloader/workflows/Python%20application/badge.svg)](https://github.com/artiomn/markdown_articles_tool/actions/)
[![Python package](https://github.com/artiomn/markdown_images_downloader/workflows/Python%20package/badge.svg)](https://github.com/artiomn/markdown_articles_tool/actions/)
[![License](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT)
[![Stargazers](https://img.shields.io/github/stars/artiomn/markdown_images_downloader.svg)](https://github.com/artiomn/markdown_images_downloader/stargazers)
[![Forks](https://img.shields.io/github/forks/artiomn/markdown_images_downloader.svg)](https://github.com/artiomn/markdown_images_downloader/network/members)
[![Latest Release](https://img.shields.io/github/v/release/artiomn/markdown_images_downloader.svg)](https://github.com/artiomn/markdown_images_downloader/releases)


# Markdown articles tool 0.0.9
# Markdown articles tool 0.1.0

Free command line utility, written in Python, designed to help you manage online and downloaded Markdown documents (e.g., articles).
The Markdown Articles Tool is available for macOS, Windows, and Linux.
Expand Down
6 changes: 6 additions & 0 deletions markdown_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import argparse
from itertools import permutations
import logging

from mimetypes import types_map

Expand All @@ -26,6 +27,9 @@ def main(arguments):
Entrypoint.
"""

logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%d.%m.%Y %H:%M:%S',
level='DEBUG' if arguments.verbose else 'INFO')

print(f'Markdown tool version {__version__} started...')

processor = ArticleProcessor(skip_list=arguments.skip_list,
Expand Down Expand Up @@ -79,6 +83,8 @@ def main(arguments):
parser.add_argument('-t', '--downloading-timeout', type=float, default=-1,
help='how many seconds to wait before downloading will be failed')
parser.add_argument('-O', '--output-path', type=str, help='article output file name')
parser.add_argument('--verbose', '-v', default=False, action='store_true',
help='More verbose logging')
parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}', help='return version number')

args = parser.parse_args()
Expand Down
2 changes: 1 addition & 1 deletion markdown_toolset/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.0.9'
__version__ = '0.1.0'
9 changes: 5 additions & 4 deletions markdown_toolset/article_processor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from pathlib import Path
from string import Template
from time import strftime
Expand Down Expand Up @@ -34,7 +35,7 @@ def process(self):
skip_list = self._process_skip_list()
article_path, article_base_url = self._get_article()

print(f'File "{article_path}" will be processed...')
logging.info('File "%s" will be processed...', article_path)

article_formatter = get_formatter(self._output_format, FORMATTERS)

Expand All @@ -52,7 +53,7 @@ def process(self):
'base_url': article_base_url.lstrip('https://').lstrip('http://')
}

print(f'Image public path: {Template(self._images_public_path).safe_substitute(**variables)}')
logging.info('Image public path: %s', Template(self._images_public_path).safe_substitute(**variables))

img_downloader = ImageDownloader(
article_path=article_path,
Expand All @@ -70,7 +71,7 @@ def process(self):
format_article(article_out_path, result, article_formatter)

if self._remove_source and article_path != article_out_path:
print(f'Removing source file "{article_path}"...')
logging.info('Removing source file "%s"...', article_path)
Path(article_path).unlink()

def _process_skip_list(self):
Expand All @@ -79,7 +80,7 @@ def _process_skip_list(self):
if isinstance(skip_list, str):
if skip_list.startswith('@'):
skip_list = skip_list[1:]
print(f'Reading skip list from a file "{skip_list}"...')
logging.info('Reading skip list from a file "%s"...', skip_list)
with open(Path(skip_list).expanduser(), 'r') as fsl:
skip_list = [s.strip() for s in fsl.readlines()]
else:
Expand Down
3 changes: 2 additions & 1 deletion markdown_toolset/formatters/helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from pathlib import Path
from typing import Any, List

Expand All @@ -15,7 +16,7 @@ def format_article(article_out_path: Path, article_text: str, formatter) -> None
Save article in the selected format.
"""

print(f'Writing file into "{article_out_path}"...')
logging.info('Writing file into "%s"...', article_out_path)

with open(article_out_path, 'wb') as outfile:
outfile.write(formatter.write(article_text))
19 changes: 10 additions & 9 deletions markdown_toolset/image_downloader.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from enum import Enum
import hashlib
from pathlib import Path
Expand Down Expand Up @@ -76,18 +77,17 @@ def download_images(self, images: List[str]) -> dict:
assert image_url not in replacement_mapping.keys(), f'BUG: already downloaded image "{image_url}"...'

if image_url in skip_list:
# TODO: Replace with logging.
print(f'Image {image_num + 1} ["{image_url}"] was skipped, because it\'s in the skip list...')
logging.debug('Image %d ["%s"] was skipped, because it\'s in the skip list...', image_num + 1, image_url)
continue

image_path_is_url = is_url(image_url)
if not image_path_is_url and not self._process_local_images:
print(f'Image {image_num + 1} ["{image_url}"] has incorrect URL...')
logging.warning('Image %d ["%s"] has incorrect URL...', image_num + 1, image_url)
if self._article_base_url:
print(f'Trying to add base URL "{self._article_base_url}"...')
logging.debug('Trying to add base URL "%s"...', self._article_base_url)
image_url = f'{self._article_base_url}/{image_url}'
else:
print('Image downloading will be skipped...')
logging.info('Image downloading will be skipped...')
continue

try:
Expand All @@ -96,8 +96,9 @@ def download_images(self, images: List[str]) -> dict:
else ImageDownloader._get_local_image(Path(image_url))
except Exception as e:
if self._skip_all_errors:
print(f'Warning: can\'t get image {image_num + 1}, error: [{str(e)}], '
'but processing will be continued, because `skip_all_errors` flag is set')
logging.warning('Can\'t get image %d, error: [%s], '
'but processing will be continued, because `skip_all_errors` flag is set',
image_num + 1, str(e))
continue
raise

Expand Down Expand Up @@ -130,7 +131,7 @@ def download_images(self, images: List[str]) -> dict:
return replacement_mapping

def _get_remote_image(self, image_url: str, img_num: int, img_count: int):
print(f'Downloading image {img_num + 1} of {img_count} from "{image_url}"...')
logging.info('Downloading image %d of {img_count} from "%s"...', img_num + 1, image_url)
img_response = download_from_url(image_url, self._downloading_timeout)

return get_filename_from_url(img_response), img_response.content
Expand All @@ -149,7 +150,7 @@ def _write_image(image_path: Path, data: bytes):
"""

# TODO: check if image already exists.
print(f'Image will be written to the file "{image_path}"...')
logging.info('Image will be written to the file "%s"...', image_path)
with open(image_path, 'wb') as image_file:
image_file.write(data)
image_file.close()
Expand Down
12 changes: 6 additions & 6 deletions markdown_toolset/transformers/html/transformer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Images extractor from HTML document.
"""

import logging
from abc import ABC
from html.parser import HTMLParser
from typing import List, TextIO, Set
Expand All @@ -16,11 +16,11 @@ def __init__(self):

def handle_starttag(self, tag, attrs):
if 'img' == tag:
print('Image was found...')
logging.info('Image was found...')
for a in attrs:
if 'src' == a[0] and a[1] is not None:
img_url = a[1]
print(f'Image URL: {img_url}...')
logging.debug(f'Image URL: {img_url}...')
self._image_urls.append(img_url)
break

Expand All @@ -47,14 +47,14 @@ def __init__(self, article_stream: TextIO, image_downloader):
def _read_article(self) -> Set[str]:
self._html_images.feed(self._article_stream.read())
images = self._html_images.image_urls
print(f'Images links count = {len(images)}')
logging.info('Images links count = %d', len(images))
images = set(images)
print(f'Unique images links count = {len(images)}')
logging.info(f'Unique images links count = %d', len(images))

return images

def _fix_document_urls(self) -> List[str]:
print('Replacing images urls in the document...')
logging.debug('Replacing images urls in the document...')
replacement_mapping = self._replacement_mapping
lines = []
self._article_stream.seek(self._start_pos)
Expand Down
7 changes: 4 additions & 3 deletions markdown_toolset/transformers/md/transformer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Images extractor from markdown document.
"""
import logging

import markdown
from markdown.treeprocessors import Treeprocessor
Expand Down Expand Up @@ -44,14 +45,14 @@ def __init__(self, article_stream: TextIO, image_downloader):

def _read_article(self) -> Set[str]:
self._md_conv.convert(self._article_stream.read())
print(f'Images links count = {len(self._md_conv.images)}')
logging.info('Images links count = %d', len(self._md_conv.images))
images = set(self._md_conv.images)
print(f'Unique images links count = {len(images)}')
logging.debug('Unique images links count = %d', len(images))

return images

def _fix_document_urls(self) -> List[str]:
print('Replacing images urls in the document...')
logging.debug('Replacing images urls in the document...')
replacement_mapping = self._replacement_mapping
lines = []
self._article_stream.seek(self._start_pos)
Expand Down
3 changes: 2 additions & 1 deletion markdown_toolset/www_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Some functions useful for the working with URLs and network.
"""
import logging

import requests
from typing import Optional
Expand Down Expand Up @@ -37,7 +38,7 @@ def download_from_url(url: str, timeout=None):
try:
response = requests.get(url, allow_redirects=True, timeout=timeout, headers=NECESSARY_HEADERS)
except requests.exceptions.SSLError:
print('Incorrect SSL certificate, trying to download without verifying...')
logging.warning('Incorrect SSL certificate, trying to download without verifying...')
response = requests.get(url, allow_redirects=True, verify=False,
timeout=timeout, headers=NECESSARY_HEADERS)

Expand Down

0 comments on commit d094a44

Please sign in to comment.