Skip to content

Commit

Permalink
Merge pull request #2 from sentenzo/dev
Browse files Browse the repository at this point in the history
`dev` => `master` (some features + refactoring)
  • Loading branch information
sentenzo authored Sep 22, 2022
2 parents b419290 + 1c99cd5 commit 3975e45
Show file tree
Hide file tree
Showing 14 changed files with 296 additions and 247 deletions.
20 changes: 16 additions & 4 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,33 @@ bot:
token: # 0123456789:will_be_taken_from_.env
allowed_usernames: ["durov"] # will be enriched from .env

utils:
thirdparty:
balcon:
path: c:\bin\balcon.exe
defaults:
ru:
voice: Microsoft Irina Desktop
speed: 8
volume: 100
pitch: 0
en:
voice: Microsoft Zira Desktop
speed: 4
volume: 100
pitch: 0
ffmpeg:
path: c:\bin\ffmpeg.exe
blb2txt:
path: c:\bin\blb2txt.exe

to_txt:
text:
max_input_size: 5242880 # 5 * 2 ^ 20 == 5 MiB
input_formats: [".epub", ".fb2", ".fb3", ".md", ".txt", ".doc", ".docx", ".rtf", ".html", ".htm"]

url_parser:
web_parser:
sites:
habr:
url_re: https\://habr\.com/..(/company/[-\w]+/blog|/post)/\d+
url_re: https\://habr\.com/..(/company/[-\w]+/blog|/post|/news/t)/\d+
lang: ru
re:
title: main h1[data-test-id="articleTitle"] span
Expand Down
Binary file modified narrator.webp
Binary file not shown.
12 changes: 11 additions & 1 deletion narrator/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ class NarratorException(Exception):
"""


###


class UrlParserException(NarratorException):
pass

Expand All @@ -16,5 +19,12 @@ class UrlUnreachable(UrlParserException):
pass


class TxtTransformerException(NarratorException):
class ParsingRulesNotFound(UrlParserException):
pass


###


class TextException(NarratorException):
pass
99 changes: 0 additions & 99 deletions narrator/sub_utils.py

This file was deleted.

36 changes: 12 additions & 24 deletions narrator/telegram/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,9 @@
from aiogram.types import Message, Document

from narrator.exceptions import UrlParserException
import narrator.url_parser as url_parser
import narrator.to_txt as to_txt
from narrator.text import Text
from narrator.sub_utils import (
balcon,
blb2txt,
ffmpeg__to_mp3,
add_suffix,
crop_suffix,
make_filename,
)
import narrator.text.web_parser as web_parser
from narrator.text.text import Text
from narrator.utils import make_filename


class ValidityCheckResult(NamedTuple):
Expand All @@ -39,10 +31,10 @@ async def produce_audio_file(self, directory: str) -> str:
class UrlWorker(BaseWorker):
def __init__(self, bot: Bot, message: Message) -> None:
super().__init__(bot, message)
self._url = url_parser.Url(self._message.text)
self._url = web_parser.Url(self._message.text)

def check_validity(self) -> ValidityCheckResult:
url: url_parser.Url = self._url
url: web_parser.Url = self._url
if not url.is_valid:
description = "Not a valid url"
return ValidityCheckResult(False, description, description)
Expand All @@ -63,10 +55,7 @@ def check_validity(self) -> ValidityCheckResult:

async def produce_audio_file(self, directory: str) -> str:
text: Text = self._url.parse()
# balcon.exe only works with UTF-8-BOM (or "utf-8-sig")
txt_path = text.save_to_txt(directory, encoding="utf-8-sig")
wav_path = balcon(txt_path)
mp3_path = ffmpeg__to_mp3(wav_path)
mp3_path = text.save_to_mp3(directory)
return mp3_path


Expand All @@ -85,15 +74,15 @@ def __init__(self, bot: Bot, message: Message) -> None:

def check_validity(self) -> ValidityCheckResult:
doc = self._doc
if not to_txt.has_proper_extention(doc.file_name):
if not Text.has_proper_extention(doc.file_name):
description = "The file extention is not supported"
user_description = "The file extention is not supported\n"
user_description += "List of supported extentions:\n"
user_description += " " + ", ".join(to_txt.INPUT_FORMATS)
user_description += " " + ", ".join(Text.INPUT_FORMATS)
return ValidityCheckResult(False, description, user_description)
elif doc.file_size > to_txt.MAX_INPUT_SIZE:
elif doc.file_size > Text.MAX_INPUT_SIZE:
s_cur_mib = DocWorker._bytes_to_mib_str(doc.file_size)
s_max_mib = DocWorker._bytes_to_mib_str(to_txt.MAX_INPUT_SIZE)
s_max_mib = DocWorker._bytes_to_mib_str(Text.MAX_INPUT_SIZE)
description = f"The file is too big: the file size is {s_cur_mib} MiB, and the max size allowed is {s_max_mib} MiB)"
return ValidityCheckResult(False, description, description)
return ValidityCheckResult(True)
Expand All @@ -102,7 +91,6 @@ async def produce_audio_file(self, directory: str) -> str:
filename = make_filename(self._doc.file_name)
file_path = os.path.join(directory, filename)
await self._bot.download(self._doc, file_path)
txt_path = blb2txt(file_path)
wav_path = balcon(txt_path)
mp3_path = ffmpeg__to_mp3(wav_path)
text = Text.from_file(file_path)
mp3_path = text.save_to_mp3(directory)
return mp3_path
83 changes: 0 additions & 83 deletions narrator/text.py

This file was deleted.

25 changes: 25 additions & 0 deletions narrator/text/language.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import string
from enum import Enum


class Language(Enum):
RU: str = "ru"
EN: str = "en"

@staticmethod
def get_val(ch: str) -> str | None:
for lang_val in ALPHABETS:
if ch in ALPHABETS[lang_val]:
return lang_val
return None


EN_ALPHABET = string.ascii_letters

RU_ALPHABET = "абвгдеёжзийклмнопрстуфхцчшщъыьэюя"
RU_ALPHABET += RU_ALPHABET.upper()

ALPHABETS = {
Language.RU.value: RU_ALPHABET,
Language.EN.value: EN_ALPHABET,
}
Loading

0 comments on commit 3975e45

Please sign in to comment.