-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathDeGruyterbook
executable file
·74 lines (60 loc) · 2.4 KB
/
DeGruyterbook
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/bash
# Call this script like
# DeGruyterbook http://www.degruyter.com/viewbooktoc/product/119402
# Parse the arguments
if [ ! $# = 1 ] || [ "x$1" = "x--help" ]; then
echo "Usage: $(basename $0) URL-to-DeGruyter-content"
echo "Example: $(basename $0) http://www.degruyter.com/viewbooktoc/product/119402"
exit
fi
# First (and only) command line argument is the URL of the online resource^
URL=$1
# Use everything after the last "/" as an identifier.
ID=$(echo $URL | grep -o '[^/]*$')
if [ ! $ID"xxx" == "xxx" ]; then
echo "Looking up $URL (identifier $ID)"
else
echo "$(basename $0): Failed to parse URL."
exit
fi
# Remember the current directory
pushd .
# Make a temporary directory
echo "Making directory /tmp/$ID"
rm -rf /tmp/$ID
mkdir /tmp/$ID
# Get the source
cd /tmp/$ID
echo "$(basename $0): Getting the HTML source"
wget --user-agent="Mozilla/5.0" $URL -O source
# Parse the source to retrieve an ordered list of download URLs
grep -o '/view/books/.*\.xml">' source > list
cat list | awk '{split($0,list,"\""); print list[1]}' > list1
cat list1 | awk '{split($0,list,"/"); print "http://www.degruyter.com/dg/viewbooktoc.chapterlist.resultlinks.fullcontentlink:pdfeventlink/$002fbooks$002f" list[4] "$002f" list[5] "$002f" list[5] ".pdf?t:ac=product/'"$ID"'"}' > list2
# Retrieve the individual files from de Gruyter
echo "$(basename $0): Getting the individual chapter files"
wget --user-agent="Mozilla/5.0" -i list2 -o logfile
# If resource was secured, print an error message and exit
grep --silent resource-secured logfile
if [ $? == 0 ]; then
echo "$(basename $0): Resource seems to be secured."
exit
fi
# Create a list of files to concatenate
# See http://superuser.com/questions/270256/how-to-make-grep-p-foo-b-r-display-only-whats-in-the-brackets-do-i-need
perl -ne '/^Saving to: (‘|\`|“)(.+?)(’|'\''|”)$/ and print "$2\n";' < logfile > download_list
# qpdf is confused by the name of the downloaded files.
# Therefore, we rename them.
j=0;
rm -f concat_list;
for i in $(cat download_list); do
mv "$i" $((++j)).pdf;
echo $j.pdf >> concat_list
done
# Concatenate the pdf files to a book
echo "$(basename $0): Concatenating files"
qpdf --empty $ID.pdf --pages $(cat concat_list) --
# Return to the current directory
popd
# Inform the user
echo "The book has been compiled and now resides in /tmp/$ID/$ID.pdf"