-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathimport_pubmed.py
executable file
·35 lines (29 loc) · 1.36 KB
/
import_pubmed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# this script was written by Jack Stevenson in 2022
# input: CSV of publications downloaded from Pubmed
# (recommended search: author "Shokat K")
# output: body text of Shokat lab website publications page
# note 1: after updating pubs.md content, still need to upload pdfs
# name pdfs by PMID and put in "/pdfs" folder
# note 2: I have made some minor corrections to the Pubmed download by hand
# when updating in future, I recommend just running this script on new pubs
# then just add the new pubs to the top of pubs.md
# good luck!
import pandas as pd
from sys import argv
# filename is supplied as a command-line argument
# e.g. "csv-ShokatKAut-set.csv"
filename = argv[1]
def format(df):
return (f"* {df.Authors} {df.Title}. {df.Citation}\n\n"
f" ([PMID {df.PMID}]"
f"(https://www.ncbi.nlm.nih.gov/pubmed/{df.PMID})) ([PDF]"
f"({{{{ site.baseurl }}}}/pdfs/{df.PMID}.pdf))\n")
# read in data from downloaded CSV file (change file name if necessary)
pubs = pd.read_csv(filename, dtype=str)
# create a new dataframe column of the text output for each publication
pubs["formatted"] = pubs.apply(format, axis=1)
# concatenate all the outputs together to make one big chunk of body text
output = pubs.formatted.str.cat(sep="\n")
# write the output to a file for later copying into pubs.md
with open("pubs_body.txt", 'w') as f:
f.write(output)