Skip to content

Commit

Permalink
feat: Add Zotero plugin and remove restrictions on number of articles…
Browse files Browse the repository at this point in the history
… required to make recommendations using S2 (#53)

ref task VirtualPatientEngine/VPECompetitiveIntelligence#3 (comment)

* fix: #47

* feat: do not consider an article negative if it is already positive for a topic

* Feat/write recommended articles (#51)

* update data

* update data

* feat: add Zotero plugin

* chore: update README explaining the Zotero plugin

* chore: utils.py and reduce the number of articles to 200

---------

Co-authored-by: GitHub Action <action@github.com>

---------

Co-authored-by: AW Mulyadi <50047143+awmulyadi@users.noreply.github.com>
Co-authored-by: GitHub Action <action@github.com>

BREAKING CHANGE: Zotero + S2 updates
  • Loading branch information
gurdeep330 authored Aug 23, 2024
1 parent da39c8a commit de94c38
Show file tree
Hide file tree
Showing 81 changed files with 76 additions and 19,984 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

<h1 align="center" style="border-bottom: none;">🚀 Literature Survey</h1>

Welcome to Team VPE's Literature Survey Template Repository! 📚✨ This repository provides you with a quick setup to create your very own automated literature survey website using Semantic Scholar's [Recommendation API](https://api.semanticscholar.org/api-docs/recommendations).
Welcome to Team VPE's Literature Survey Template Repository! 📚✨ This repository provides you with a quick setup to create your very own automated literature survey website using Semantic Scholar's [Recommendation API](https://api.semanticscholar.org/api-docs/recommendations), and provides an option to import the recommended articles to your [Zotero](https://www.zotero.org/) account.

### Types of Recommendations
Semantic Scholar provides 2 types of recommendations:
Expand Down Expand Up @@ -66,6 +66,16 @@ Head over to the localhost link that pops up in your terminal.

12. (Optional) Edit custom.css if you'd like to change the styling of web pages.

### Zotero Plugin
If you'd like to read the recommended articles in your Zotero Account:
1. Create an account with Zotero
2. Under the `Settings` tab in your GitHub repo, click on `Secrets and variables`, and select `Actions`
3. Set the following `Repository secrets`:
- `ZOTERO_API_KEY` as Zotero API key (you can get it [here](https://www.zotero.org/settings/keys/new))
- `LIBRARY_ID` as your group ID (this can be found by opening the group's page: https://www.zotero.org/groups/groupname , and hovering over the group settings link. The ID is the integer after /groups/)
- `TEST_COLLECTION_KEY` as your collection's key (enter `https://api.zotero.org/groups/<LIBRARY_ID>/collections?key=<ZOTERO_API_KEY>` in your browser to view all the collections in your group; choose the key of the collection in which you'd like the recommended articles to be sotred)
4. The changes take effect only when the code is re-run. This can happen either the next time the code is scheduled for a run (Mondays) or under the `Actions` tab, select `mkdocs-deploy` from the left panel, and click on `Run workflow`.

### Bugs? Feature Requests?
If you encounter any bugs or have brilliant ideas for new features, please head over to the [Issues](https://github.com/VirtualPatientEngine/literatureSurvey/issues) and let us know.

Expand Down
4 changes: 3 additions & 1 deletion app/code/literature_fetch_recommendation_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def create_template(template,
if len(topic_obj.paper_ids['positive']) == 0:
print (f'No positive articles for {topic_obj.topic}. Skipping...')
else:
search_response_json = utils.add_recommendations(topic_obj, limit=300)
search_response_json = utils.add_recommendations(topic_obj, limit=200)
for paper_data in search_response_json['recommendedPapers']:
paper_id = paper_data['paperId']
# skip the ones with publication date is null
Expand Down Expand Up @@ -208,6 +208,8 @@ def create_template(template,
for article_id, article_obj in topic_obj.paper_ids[article_type].items():
utils.update_h_index(article_obj, author_details)
print (f'Fetched the details of the authors (n={len(author_details)}) for {topic}.')
# Add the recommended articles to Zotero
utils.add_recommended_articles_to_zotero(topic, topic_obj.paper_ids)
# Create the markdown text
markdown_text = create_template(
("../../templates", "topic.txt"),
Expand Down
83 changes: 60 additions & 23 deletions app/code/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,81 @@
script to define utility functions
'''

import os
import sys
import re
import matplotlib.pyplot as plt
import pandas as pd
import yaml
import requests
from pyzotero import zotero

FIELDS = 'paperId,url,authors,journal,title,'
FIELDS += 'publicationTypes,publicationDate,citationCount,'
FIELDS += 'publicationVenue,externalIds,abstract'

def add_negative_articles(topic_obj, dic, max_num_articles=10):
LIBRARY_TYPE = 'group'
LIBRARY_ID = os.environ.get('LIBRARY_ID')
ZOTERO_API_KEY = os.environ.get('ZOTERO_API_KEY')
TEST_COLLECTION_KEY = os.environ.get('TEST_COLLECTION_KEY')

def add_recommended_articles_to_zotero(topic_name, paper_ids):
"""
Add the recommended articles to zotero
"""
if LIBRARY_ID is None or ZOTERO_API_KEY is None or TEST_COLLECTION_KEY is None:
print ('Zotero credentials not found.')
else:
print ('Adding recommended articles to Zotero.')
# Create a zotero object
zot = zotero.Zotero(LIBRARY_ID, LIBRARY_TYPE, ZOTERO_API_KEY)
new_items = []
for _, paper_obj in paper_ids['recommended'].items():
# create a template for the paper
template = zot.item_template('journalArticle')
template['title'] = paper_obj.info.title
template['creators'] = []
for author in paper_obj.authors:
template['creators'].append({'creatorType': 'author',
'name': author.author_name})
template['publicationTitle'] = paper_obj.info.journal
template['date'] = paper_obj.info.publication_date
template['abstractNote'] = paper_obj.info.abstract
template['url'] = paper_obj.info.url
# assign topic names as tags
template['tags'] = [{'tag': topic_name}]
# assign the paper to the collection
template['collections'] = [TEST_COLLECTION_KEY]
new_items.append(template)
# add all the items to zotero, only 50 items at a time
for i in range(0, len(new_items), 50):
zot.check_items(new_items[i:i+50])
zot.create_items(new_items[i:i+50])

def add_negative_articles(topic_obj, dic):
"""
Add the negative articles to the topic object
"""
if 'negative' not in topic_obj.paper_ids:
topic_obj.paper_ids['negative'] = {}
num_topics = len(dic) - 1
while len(topic_obj.paper_ids["negative"]) < max_num_articles:
for topic in dic:
if topic == topic_obj.topic:
for topic in dic:
# Skip the current topic
if topic == topic_obj.topic:
continue
# Add the negative articles to the topic object
for paper_id in dic[topic].paper_ids['positive']:
# Skip if the paper id is already in the negative articles
if paper_id in topic_obj.paper_ids['negative']:
continue
articles_per_topic = max_num_articles // num_topics
for paper_id in dic[topic].paper_ids['positive']:
if paper_id in topic_obj.paper_ids['negative']:
continue
topic_obj.paper_ids['negative'][paper_id]=dic[topic].paper_ids['positive'][paper_id]
articles_per_topic -= 1
if articles_per_topic == 0:
break
if len(topic_obj.paper_ids["negative"]) == max_num_articles:
break
if len(topic_obj.paper_ids["negative"]) == max_num_articles:
break
# Skip if the paper id is already in the positive articles
# i.e. do not add the same paper id to both positive and negative articles
if paper_id in topic_obj.paper_ids['positive']:
continue
# Skip if the paper id if it is marked to be not used for recommendation
paper_obj = dic[topic].paper_ids['positive'][paper_id]
if paper_obj.use_for_recommendation is False:
continue
topic_obj.paper_ids['negative'][paper_id]=dic[topic].paper_ids['positive'][paper_id]
print (f'Added {len(topic_obj.paper_ids["negative"])} negative articles for {topic_obj.topic}.')

def update_paper_details(topic_obj):
Expand Down Expand Up @@ -178,18 +219,14 @@ def add_recommendations(topic_obj,
params = {'fields': fields, 'limit': limit}
# Select positive articles that have use_for_recommendation set to True
positive_paper_ids = []
count = 0
# count = 0
for paper_id, paper_obj in topic_obj.paper_ids['positive'].items():
if paper_obj.use_for_recommendation is False:
continue
positive_paper_ids.append(paper_id)
count += 1
if count == 10:
break
json = {
# 'positivePaperIds': list(topic_obj.paper_ids['positive'].keys())[:10],
'positivePaperIds': positive_paper_ids,
'negativePaperIds': list(topic_obj.paper_ids['negative'].keys())[:10],
'negativePaperIds': list(topic_obj.paper_ids['negative'].keys()),
}
status_code = 0
while status_code not in [200, 400, 404]:
Expand Down
Loading

0 comments on commit de94c38

Please sign in to comment.