Skip to content

Commit

Permalink
Don't use requests session with multithreading
Browse files Browse the repository at this point in the history
  • Loading branch information
nemobis committed Jan 2, 2024
1 parent c784a67 commit 8b0776f
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/oabot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def keep_existing_url(self, url):
return True

try:
r = SESSION.head(url, timeout=5, allow_redirects=True)
r = requests.head(url, timeout=(5, 1), allow_redirects=True, headers={'User-Agent': OABOT_USER_AGENT})
except requests.exceptions.RequestException:
r = None
# Avoid changing an URL which already clearly points to an open PDF
Expand Down Expand Up @@ -418,7 +418,7 @@ def get_oa_link(paper, doi=None, only_unpaywall=True):
for url in sort_links(candidate_urls):
if url:
try:
head = SESSION.head(url, timeout=10)
head = requests.head(url, timeout=(5, 1), allow_redirects=True, headers={'User-Agent': OABOT_USER_AGENT})
head.raise_for_status()
if head.status_code < 400 and 'Location' in head.headers and urllib.parse.urlparse(head.headers['Location']).path == '/':
# Redirects to main page: fake status code, should be not found
Expand Down
2 changes: 1 addition & 1 deletion src/oabot/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# Mount point is '/'
OABOT_APP_MOUNT_POINT = ''

OABOT_USER_AGENT = 'OAbot/1.0 (+http://enwp.org/WP:OABOT)'
OABOT_USER_AGENT = 'OAbot/1.0 (+http://enwp.org/WP:OABOT) not Googlebot'

# the bot will not make any changes to these templates
excluded_templates = ['cite arxiv', 'cite web', 'cite news', 'cite book']
Expand Down

0 comments on commit 8b0776f

Please sign in to comment.