Skip to content

Commit

Permalink
[bot] AutoMerging: merge all upstream's changes:
Browse files Browse the repository at this point in the history
* https://github.com/ytdl-org/youtube-dl:
  [netease] Get netease music download url through player api (ytdl-org#31235)
  [Common:JWPlayer] Fix x1000 scaling error
  [utils] Sanitize look-alike Unicode glyphs in non-ID filename fields when --restrict-filenames
  [JSInterp] Improve separation logic
  [ZDF] Overhaul ZDF extractors * pull some yt-dlp changes into ZDFBaseIE._extract_format() * add test cases from yt-dlp to ZDFIE * fix crash in ZDFIE._extract_mobile() when object had no `formitaeten` * improve title extraction in ZDFChannelIE (remove trailing station ident) * avoid extracting non-video playlist items (fixes ytdl-org#31149)
  [test] Implement string "lambda x: condition(x)" as an expected value
  [motherless] Fixed the broken uploader_id in the extractor (ytdl-org#31243)
  • Loading branch information
github-actions[bot] committed Oct 11, 2022
2 parents 9e74a5d + c91cbf6 commit 75903c9
Show file tree
Hide file tree
Showing 9 changed files with 251 additions and 126 deletions.
8 changes: 7 additions & 1 deletion test/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ def expect_value(self, got, expected, field):
self.assertTrue(
contains_str in got,
'field %s (value: %r) should contain %r' % (field, got, contains_str))
elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected):
fn = eval(expected)
suite = expected.split(':', 1)[1].strip()
self.assertTrue(
fn(got),
'Expected field %s to meet condition %s, but value %r failed ' % (field, suite, got))
elif isinstance(expected, type):
self.assertTrue(
isinstance(got, expected),
Expand All @@ -137,7 +143,7 @@ def expect_value(self, got, expected, field):
elif isinstance(expected, list) and isinstance(got, list):
self.assertEqual(
len(expected), len(got),
'Expect a list of length %d, but got a list of length %d for field %s' % (
'Expected a list of length %d, but got a list of length %d for field %s' % (
len(expected), len(got), field))
for index, (item_got, item_expected) in enumerate(zip(got, expected)):
type_got = type(item_got)
Expand Down
9 changes: 8 additions & 1 deletion test/test_aes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text, aes_ecb_encrypt
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
import base64

Expand Down Expand Up @@ -58,6 +58,13 @@ def test_decrypt_text(self):
decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg)

def test_ecb_encrypt(self):
data = bytes_to_intlist(self.secret_msg)
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
self.assertEqual(
encrypted,
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')


if __name__ == '__main__':
unittest.main()
37 changes: 35 additions & 2 deletions youtube_dl/aes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@
BLOCK_SIZE_BYTES = 16


def pkcs7_padding(data):
"""
PKCS#7 padding
@param {int[]} data cleartext
@returns {int[]} padding data
"""

remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
return data + [remaining_length] * remaining_length


def aes_ctr_decrypt(data, key, counter):
"""
Decrypt with aes in counter mode
Expand Down Expand Up @@ -76,8 +88,7 @@ def aes_cbc_encrypt(data, key, iv):
previous_cipher_block = iv
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
remaining_length = BLOCK_SIZE_BYTES - len(block)
block += [remaining_length] * remaining_length
block = pkcs7_padding(block)
mixed_block = xor(block, previous_cipher_block)

encrypted_block = aes_encrypt(mixed_block, expanded_key)
Expand All @@ -88,6 +99,28 @@ def aes_cbc_encrypt(data, key, iv):
return encrypted_data


def aes_ecb_encrypt(data, key):
"""
Encrypt with aes in ECB mode. Using PKCS#7 padding
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))

encrypted_data = []
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
block = pkcs7_padding(block)

encrypted_block = aes_encrypt(block, expanded_key)
encrypted_data += encrypted_block

return encrypted_data


def key_expansion(data):
"""
Generate key schedule
Expand Down
2 changes: 1 addition & 1 deletion youtube_dl/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2844,7 +2844,7 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
'url': source_url,
'width': int_or_none(source.get('width')),
'height': height,
'tbr': int_or_none(source.get('bitrate')),
'tbr': int_or_none(source.get('bitrate'), scale=1000),
'ext': ext,
}
if source_url.startswith('rtmp'):
Expand Down
13 changes: 7 additions & 6 deletions youtube_dl/extractor/motherless.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals

import datetime
Expand Down Expand Up @@ -71,7 +72,7 @@ class MotherlessIE(InfoExtractor):
'title': 'a/ Hot Teens',
'categories': list,
'upload_date': '20210104',
'uploader_id': 'yonbiw',
'uploader_id': 'anonymous',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
},
Expand Down Expand Up @@ -127,7 +128,7 @@ def _real_extract(self, url):

comment_count = webpage.count('class="media-comment-contents"')
uploader_id = self._html_search_regex(
r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)''',
webpage, 'uploader_id')

categories = self._html_search_meta('keywords', webpage, default=None)
Expand Down Expand Up @@ -169,7 +170,7 @@ class MotherlessGroupIE(InfoExtractor):
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
'any kind!'
},
'playlist_mincount': 9,
'playlist_mincount': 0,
}]

@classmethod
Expand Down Expand Up @@ -208,9 +209,9 @@ def _real_extract(self, url):
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
description = self._html_search_meta(
'description', webpage, fatal=False)
page_count = self._int(self._search_regex(
r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
webpage, 'page_count'), 'page_count')
page_count = str_to_int(self._search_regex(
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
webpage, 'page_count', default='1'))
PAGE_SIZE = 80

def _get_page(idx):
Expand Down
123 changes: 86 additions & 37 deletions youtube_dl/extractor/neteasemusic.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
# coding: utf-8
from __future__ import unicode_literals

from hashlib import md5
from base64 import b64encode
from binascii import hexlify
from datetime import datetime
from hashlib import md5
from random import randint
import json
import re
import time

from .common import InfoExtractor
from ..aes import aes_ecb_encrypt, pkcs7_padding
from ..compat import (
compat_urllib_parse_urlencode,
compat_str,
compat_itertools_count,
)
from ..utils import (
sanitized_Request,
ExtractorError,
bytes_to_intlist,
float_or_none,
int_or_none,
intlist_to_bytes,
sanitized_Request,
std_headers,
try_get,
)


Expand All @@ -35,32 +46,85 @@ def _encrypt(cls, dfsid):
result = b64encode(m.digest()).decode('ascii')
return result.replace('/', '_').replace('+', '-')

@classmethod
def make_player_api_request_data_and_headers(cls, song_id, bitrate):
KEY = b'e82ckenh8dichen8'
URL = '/api/song/enhance/player/url'
now = int(time.time() * 1000)
rand = randint(0, 1000)
cookie = {
'osver': None,
'deviceId': None,
'appver': '8.0.0',
'versioncode': '140',
'mobilename': None,
'buildver': '1623435496',
'resolution': '1920x1080',
'__csrf': '',
'os': 'pc',
'channel': None,
'requestId': '{0}_{1:04}'.format(now, rand),
}
request_text = json.dumps(
{'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
separators=(',', ':'))
message = 'nobody{0}use{1}md5forencrypt'.format(
URL, request_text).encode('latin1')
msg_digest = md5(message).hexdigest()

data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
URL, request_text, msg_digest)
data = pkcs7_padding(bytes_to_intlist(data))
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
encrypted_params = hexlify(encrypted).decode('ascii').upper()

cookie = '; '.join(
['{0}={1}'.format(k, v if v is not None else 'undefined')
for [k, v] in cookie.items()])

headers = {
'User-Agent': std_headers['User-Agent'],
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'https://music.163.com',
'Cookie': cookie,
}
return ('params={0}'.format(encrypted_params), headers)

def _call_player_api(self, song_id, bitrate):
url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
try:
return self._download_json(
url, song_id, data=data.encode('ascii'), headers=headers)
except ExtractorError as e:
if type(e.cause) in (ValueError, TypeError):
# JSON load failure
raise
except Exception:
pass
return {}

def extract_formats(self, info):
formats = []
song_id = info['id']
for song_format in self._FORMATS:
details = info.get(song_format)
if not details:
continue
song_file_path = '/%s/%s.%s' % (
self._encrypt(details['dfsId']), details['dfsId'], details['extension'])

# 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
# from NetEase's CDN provider that can be used if m5.music.126.net does not
# work, especially for users outside of Mainland China
# via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
song_url = host + song_file_path

bitrate = int_or_none(details.get('bitrate')) or 999000
data = self._call_player_api(song_id, bitrate)
for song in try_get(data, lambda x: x['data'], list) or []:
song_url = try_get(song, lambda x: x['url'])
if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({
'url': song_url,
'ext': details.get('extension'),
'abr': float_or_none(details.get('bitrate'), scale=1000),
'abr': float_or_none(song.get('br'), scale=1000),
'format_id': song_format,
'filesize': details.get('size'),
'asr': details.get('sr')
'filesize': int_or_none(song.get('size')),
'asr': int_or_none(details.get('sr')),
})
break
return formats

@classmethod
Expand All @@ -79,30 +143,16 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://music.163.com/#/song?id=32102397',
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
'md5': '3e909614ce09b1ccef4a3eb205441190',
'info_dict': {
'id': '32102397',
'ext': 'mp3',
'title': 'Bad Blood (feat. Kendrick Lamar)',
'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar',
'upload_date': '20150517',
'timestamp': 1431878400,
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
'upload_date': '20150516',
'timestamp': 1431792000,
'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'No lyrics translation.',
'url': 'http://music.163.com/#/song?id=29822014',
'info_dict': {
'id': '29822014',
'ext': 'mp3',
'title': '听见下雨的声音',
'creator': '周杰伦',
'upload_date': '20141225',
'timestamp': 1419523200,
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'No lyrics.',
'url': 'http://music.163.com/song?id=17241424',
Expand All @@ -112,9 +162,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'title': 'Opus 28',
'creator': 'Dustin O\'Halloran',
'upload_date': '20080211',
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
'timestamp': 1202745600,
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043',
Expand All @@ -128,7 +178,6 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'timestamp': 1264608000,
'alt_title': '说出愿望吧(Genie)',
},
'skip': 'Blocked outside Mainland China',
}]

def _process_lyrics(self, lyrics_info):
Expand Down
Loading

0 comments on commit 75903c9

Please sign in to comment.