Skip to content

Commit

Permalink
Merge commit 'de39d1281cea499cb1adfce5ff7e0a56f1bad5fe'
Browse files Browse the repository at this point in the history
* commit 'de39d1281cea499cb1adfce5ff7e0a56f1bad5fe':
  [extractor/ceskatelevize] Back-port extractor from yt-dlp, etc (ytdl-org#30713)
  [utils] Backport traverse_obj (etc) from yt-dlp (ytdl-org#31156)
  [compat] Work around in case folding for narrow Python build
  [compat] Add test for compat_casefold()
  [compat] Add test for compat_casefold()
  [compat] Reformat casefold.py for easier updating
  [compat] Unify unicode/str compat and move up
  [compat] Add compat_casefold and compat_re_Match, for traverse_obj() port
  [compat] Add Python 2 Unicode casefold using a trivial wrapper around icu/CaseFolding.txt
  [netease] Support urls shared from mobile app (ytdl-org#31304)
  [netease] Impove error handling (ytdl-org#31303)
  [Vimeo] Update variable name in hydration JSON pattern

# Conflicts:
#	youtube_dl/extractor/ceskatelevize.py
  • Loading branch information
gaming-hacker committed Nov 4, 2022
2 parents cd79f63 + de39d12 commit 7d58379
Show file tree
Hide file tree
Showing 8 changed files with 2,401 additions and 130 deletions.
15 changes: 14 additions & 1 deletion test/test_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@


from youtube_dl.compat import (
compat_casefold,
compat_getenv,
compat_setenv,
compat_etree_Element,
Expand Down Expand Up @@ -122,9 +123,21 @@ def test_compat_etree_fromstring_doctype(self):
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
compat_etree_fromstring(xml)

def test_struct_unpack(self):
def test_compat_struct_unpack(self):
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))

def test_compat_casefold(self):
if hasattr(compat_str, 'casefold'):
# don't bother to test str.casefold() (again)
return
# thanks https://bugs.python.org/file24232/casefolding.patch
self.assertEqual(compat_casefold('hello'), 'hello')
self.assertEqual(compat_casefold('hELlo'), 'hello')
self.assertEqual(compat_casefold('ß'), 'ss')
self.assertEqual(compat_casefold('fi'), 'fi')
self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')


if __name__ == '__main__':
unittest.main()
323 changes: 323 additions & 0 deletions test/test_utils.py

Large diffs are not rendered by default.

1,667 changes: 1,667 additions & 0 deletions youtube_dl/casefold.py

Large diffs are not rendered by default.

34 changes: 18 additions & 16 deletions youtube_dl/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,19 @@
import sys
import xml.etree.ElementTree

# deal with critical unicode/str things first
try:
# Python 2
compat_str, compat_basestring, compat_chr = (
unicode, basestring, unichr
)
from .casefold import casefold as compat_casefold
except NameError:
compat_str, compat_basestring, compat_chr = (
str, str, chr
)
compat_casefold = lambda s: s.casefold()

try:
import collections.abc as compat_collections_abc
except ImportError:
Expand Down Expand Up @@ -2373,11 +2386,6 @@ class compat_HTMLParseError(Exception):
except ImportError:
import BaseHTTPServer as compat_http_server

try:
compat_str = unicode # Python 2
except NameError:
compat_str = str

try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote
Expand Down Expand Up @@ -2508,22 +2516,11 @@ def data_open(self, req):

return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)

try:
compat_basestring = basestring # Python 2
except NameError:
compat_basestring = str

try:
compat_chr = unichr # Python 2
except NameError:
compat_chr = chr

try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error


etree = xml.etree.ElementTree


Expand Down Expand Up @@ -3069,6 +3066,9 @@ def parents(self):

# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
compat_re_Pattern = type(re.compile(''))
# and on the type of a match
compat_re_Match = type(re.match('a', 'a'))


if sys.version_info < (3, 3):
def compat_b64decode(s, *args, **kwargs):
Expand Down Expand Up @@ -3104,6 +3104,7 @@ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
'compat_Struct',
'compat_b64decode',
'compat_basestring',
'compat_casefold',
'compat_chr',
'compat_collections_abc',
'compat_collections_chain_map',
Expand Down Expand Up @@ -3135,6 +3136,7 @@ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
'compat_os_name',
'compat_parse_qs',
'compat_print',
'compat_re_Match',
'compat_re_Pattern',
'compat_realpath',
'compat_setenv',
Expand Down
109 changes: 1 addition & 108 deletions youtube_dl/extractor/ceskatelevize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,118 +13,11 @@
float_or_none,
sanitized_Request,
str_or_none,
traverse_obj,
urlencode_postdata,
USER_AGENTS,
)

try:
from ..utils import traverse_obj
except ImportError:
from ..compat import (compat_str, compat_collections_abc)
from ..utils import int_or_none

def traverse_obj(obj, *path_list, **kw):
''' Traverse nested list/dict/tuple'''

# parameter defaults
default = kw.get('default')
expected_type = kw.get('expected_type')
get_all = kw.get('get_all', True)
casesense = kw.get('casesense', True)
is_user_input = kw.get('is_user_input', False)
traverse_string = kw.get('traverse_string', False)

def variadic(x, allowed_types=(compat_str, bytes)):
return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)

def listish(l):
return isinstance(l, (list, tuple))

def from_iterable(iterables):
# chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
for it in iterables:
for element in it:
yield element

class Nonlocal:
pass
nl = Nonlocal()

if not casesense:
_lower = lambda k: (k.lower() if isinstance(k, compat_str) else k)
path_list = (map(_lower, variadic(path)) for path in path_list)

def _traverse_obj(obj, path, _current_depth=0):
path = tuple(variadic(path))
for i, key in enumerate(path):
if obj is None:
return None
if listish(key):
obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
key = Ellipsis
if key is Ellipsis:
obj = (obj.values() if isinstance(obj, dict)
else obj if listish(obj)
else compat_str(obj) if traverse_string else [])
_current_depth += 1
nl.depth = max(nl.depth, _current_depth)
return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
elif callable(key):
if listish(obj):
obj = enumerate(obj)
elif isinstance(obj, dict):
obj = obj.items()
else:
if not traverse_string:
return None
obj = str(obj)
_current_depth += 1
nl.depth = max(nl.depth, _current_depth)
return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
elif isinstance(obj, dict) and not (is_user_input and key == ':'):
obj = (obj.get(key) if casesense or (key in obj)
else next((v for k, v in obj.items() if _lower(k) == key), None))
else:
if is_user_input:
key = (int_or_none(key) if ':' not in key
else slice(*map(int_or_none, key.split(':'))))
if key == slice(None):
return _traverse_obj(obj, tuple([Ellipsis] + list(path[i + 1:])), _current_depth)
if not isinstance(key, (int, slice)):
return None
if not listish(obj):
if not traverse_string:
return None
obj = compat_str(obj)
try:
obj = obj[key]
except IndexError:
return None
return obj

if isinstance(expected_type, type):
type_test = lambda val: val if isinstance(val, expected_type) else None
elif expected_type is not None:
type_test = expected_type
else:
type_test = lambda val: val

for path in path_list:
nl.depth = 0
val = _traverse_obj(obj, path)
if val is not None:
if nl.depth:
for _ in range(nl.depth - 1):
val = from_iterable(v for v in val if v is not None)
val = [v for v in map(type_test, val) if v is not None]
if val:
return val if get_all else val[0]
else:
val = type_test(val)
if val is not None:
return val
return default


class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
Expand Down
42 changes: 38 additions & 4 deletions youtube_dl/extractor/neteasemusic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ..utils import (
ExtractorError,
bytes_to_intlist,
error_to_compat_str,
float_or_none,
int_or_none,
intlist_to_bytes,
Expand Down Expand Up @@ -94,17 +95,23 @@ def _call_player_api(self, song_id, bitrate):
url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
try:
return self._download_json(
msg = 'empty result'
result = self._download_json(
url, song_id, data=data.encode('ascii'), headers=headers)
if result:
return result
except ExtractorError as e:
if type(e.cause) in (ValueError, TypeError):
# JSON load failure
raise
except Exception:
pass
except Exception as e:
msg = error_to_compat_str(e)
self.report_warning('%s API call (%s) failed: %s' % (
song_id, bitrate, msg))
return {}

def extract_formats(self, info):
err = 0
formats = []
song_id = info['id']
for song_format in self._FORMATS:
Expand All @@ -116,6 +123,8 @@ def extract_formats(self, info):
data = self._call_player_api(song_id, bitrate)
for song in try_get(data, lambda x: x['data'], list) or []:
song_url = try_get(song, lambda x: x['url'])
if not song_url:
continue
if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({
'url': song_url,
Expand All @@ -125,6 +134,19 @@ def extract_formats(self, info):
'filesize': int_or_none(song.get('size')),
'asr': int_or_none(details.get('sr')),
})
elif err == 0:
err = try_get(song, lambda x: x['code'], int)

if not formats:
msg = 'No media links found'
if err != 0 and (err < 200 or err >= 400):
raise ExtractorError(
'%s (site code %d)' % (msg, err, ), expected=True)
else:
self.raise_geo_restricted(
msg + ': probably this video is not available from your location due to geo restriction.',
countries=['CN'])

return formats

@classmethod
Expand All @@ -140,7 +162,7 @@ def query_api(self, endpoint, video_id, note):
class NetEaseMusicIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:song'
IE_DESC = '网易云音乐'
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://music.163.com/#/song?id=32102397',
'md5': '3e909614ce09b1ccef4a3eb205441190',
Expand Down Expand Up @@ -178,6 +200,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'timestamp': 1264608000,
'alt_title': '说出愿望吧(Genie)',
},
}, {
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
'md5': '95826c73ea50b1c288b22180ec9e754d',
'info_dict': {
'id': '95670',
'ext': 'mp3',
'title': '国际歌',
'creator': '马备',
'upload_date': '19911130',
'timestamp': 691516800,
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
},
}]

def _process_lyrics(self, lyrics_info):
Expand Down
2 changes: 1 addition & 1 deletion youtube_dl/extractor/vimeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ def _real_extract(self, url):

if '//player.vimeo.com/video/' in url:
config = self._parse_json(self._search_regex(
r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
if config.get('view') == 4:
config = self._verify_player_video_password(
redirect_url, video_id, headers)
Expand Down
Loading

0 comments on commit 7d58379

Please sign in to comment.