From 1641fff9d7c5d38af2fa297f09e4b379d8364b4f Mon Sep 17 00:00:00 2001 From: Kentaro Wada Date: Tue, 16 Jan 2024 22:14:44 +0900 Subject: [PATCH] Set default user-agent with giving option to override it --- gdown/cli.py | 6 ++++++ gdown/download.py | 17 ++++++++++++----- gdown/download_folder.py | 10 +++++++++- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/gdown/cli.py b/gdown/cli.py index bf80deaa..76171b9e 100644 --- a/gdown/cli.py +++ b/gdown/cli.py @@ -121,6 +121,10 @@ def main(): help="Format of Google Docs, Spreadsheets and Slides. " "Default is Google Docs: 'docx', Spreadsheet: 'xlsx', Slides: 'pptx'.", ) + parser.add_argument( + "--user-agent", + help="User-Agent to use for downloading file.", + ) args = parser.parse_args() @@ -159,6 +163,7 @@ def main(): use_cookies=not args.no_cookies, verify=not args.no_check_certificate, remaining_ok=args.remaining_ok, + user_agent=args.user_agent, ) else: download( @@ -173,6 +178,7 @@ def main(): fuzzy=args.fuzzy, resume=args.continue_, format=args.format, + user_agent=args.user_agent, ) except FileURLRetrievalError as e: print(e, file=sys.stderr) diff --git a/gdown/download.py b/gdown/download.py index bcc37675..e9b376d2 100644 --- a/gdown/download.py +++ b/gdown/download.py @@ -54,12 +54,10 @@ def get_url_from_gdrive_confirmation(contents): return url -def _get_session(proxy, use_cookies, return_cookies_file=False): +def _get_session(proxy, use_cookies, user_agent, return_cookies_file=False): sess = requests.session() - sess.headers.update( - {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)"} - ) + sess.headers.update({"User-Agent": user_agent}) if proxy is not None: sess.proxies = {"http": proxy, "https": proxy} @@ -91,6 +89,7 @@ def download( fuzzy=False, resume=False, format=None, + user_agent=None, ): """Download file from URL. @@ -124,6 +123,8 @@ def download( - Google Docs: 'docx' - Google Spreadsheet: 'xlsx' - Google Slides: 'pptx' + user_agent: str, optional + User-agent to use in the HTTP request. Returns ------- @@ -134,11 +135,17 @@ def download( raise ValueError("Either url or id has to be specified") if id is not None: url = "https://drive.google.com/uc?id={id}".format(id=id) + if user_agent is None: + # We need to use different user agent for file download c.f., folder + user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501 url_origin = url sess, cookies_file = _get_session( - proxy=proxy, use_cookies=use_cookies, return_cookies_file=True + proxy=proxy, + use_cookies=use_cookies, + user_agent=user_agent, + return_cookies_file=True, ) gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy) diff --git a/gdown/download_folder.py b/gdown/download_folder.py index 3153b7ec..81bee678 100644 --- a/gdown/download_folder.py +++ b/gdown/download_folder.py @@ -203,6 +203,7 @@ def download_folder( use_cookies=True, remaining_ok=False, verify=True, + user_agent=None, ): """Downloads entire folder from URL. @@ -228,6 +229,8 @@ def download_folder( Either a bool, in which case it controls whether the server's TLS certificate is verified, or a string, in which case it must be a path to a CA bundle to use. Default is True. + user_agent: str, optional + User-agent to use in the HTTP request. Returns ------- @@ -245,8 +248,13 @@ def download_folder( raise ValueError("Either url or id has to be specified") if id is not None: url = "https://drive.google.com/drive/folders/{id}".format(id=id) + if user_agent is None: + # We need to use different user agent for folder download c.f., file + user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36" # NOQA: E501 - sess = _get_session(proxy=proxy, use_cookies=use_cookies) + sess = _get_session( + proxy=proxy, use_cookies=use_cookies, user_agent=user_agent + ) if not quiet: print("Retrieving folder contents", file=sys.stderr)