-
Notifications
You must be signed in to change notification settings - Fork 63
/
Copy pathpubmed.pl
55 lines (40 loc) · 1.18 KB
/
pubmed.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/perl -w
#This script gets gets the pubmed abstracts for a given term
use LWP::Simple;
#search term to find
$search_term = "hiv entry";
#replace space with +
$search_term =~ s/\s/+/g;
#print $search_term;
#maximum number of results to retrieve
$retmax = 10;
#base url
$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
#set the database to search
$db_name = 'pmc';
#generate the query url
$url = $base."esearch.fcgi?db=$db_name&retmax=$retmax&term=$search_term";
#print $url;
#Submit the search and retrieve the XML based results
$esearch_result=get($url);
#print "$esearch_result";
#extract paper IDs using match regex . anything * anytime, globally
@ids = ($esearch_result =~ m|.*<Id>(.*)</Id>.*|g);
#print join(',',@ids);
#delete old abstract.txt file
unlink "abstracts.txt";
#loop through all the ids
foreach $id (@ids)
{
print "$id\n";
#get abstract for each pubmed id
$fetchurl = $base."efetch.fcgi?db=pubmed&id=$id&retmode=text&rettype=abstract";
#print get($fetchurl);
#open a file for appending the output
open(OUTFILE,'>>','abstracts.txt');
#get the results and print to the filehandle
print OUTFILE get($fetchurl);
sleep 1;
#close file
close OUTFILE;
}