-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathnospps.ksh
93 lines (79 loc) · 2.65 KB
/
nospps.ksh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/bin/bash
# space apps challenge: syncing nasa open source projects
# 2013-04-20
# s. henry herold (henry.herold@comcast.net)
# Brandon Joyce
# Marc Phillips
# Pawel Grzegrzolka
# usage: ./nospps.ksh
# description: this program scrapes the project url's from code.nasa.gov and parses the links to get the latest downloads of each project.
site="http://code.nasa.gov/project/"
control=0; i=1
while [ $control -eq 0 ]
do
wget -q "$site"page/$i -O nospps-list.$i
if [ $? -eq 0 ]
then
grep \<h1\>\<a.*/a\>\</h1\> nospps-list.$i | tr '<>' '<<' | awk -F \< '{print $5}' >> nospps-project.title
egrep "(opensource.gsfc.nasa.gov|github.com|svn.apache.org|sourceforge.net|gmat.gsfc.nasa.gov|directreadout.sci.gsfc.nasa.gov)" nospps-list.$i | awk -F \" '{print $2}' >> nospps-project.url
# paste nospps-project-title.$i nospps-project-url.$i
i=`expr $i + 1`
else
rm -f nospps-list.*
control=1
fi
done
i=1
cat nospps-project.title | while read line
do
title=`echo $line | tr ' /(),' '-----'`
mkdir -p $title; cd $title
url=`sed -n "$i"p ../nospps-project.url`
if [ "`echo $url | egrep "(\\.htm$|\\.php$|\\.html$|\\.cgi$)"`" ]
then
url=${url%/*}
fi
if [ -z "`echo $url | grep /$`" ]
then
url=$url/
fi
####################
if [ "`echo $url | grep opensource.gsfc.nasa.gov`" ] ########## nasa's open source site scraper
then
wget -q $url -O $title.html
source=`egrep -o \".*\\."(tar|gz|zip|bz2)"\" $title.html | awk -F \" '{print $2}'`
for item in $source
do
wget -N ${url%/*}/$item
if [ $? -ne 0 ]
then
echo; echo ----------------- title = $title :: item = $item;echo
fi
done
####################
elif [ "`echo $url | grep sourceforge.net`" ] ########## nasa's sourceforge project scraper
then
if [ "`echo $url | grep /files/`" ]
then
url=`echo $url | awk -F /files/ '{print $1 "/"}'`
fi
if [ -z "`echo $url | grep http://sourceforge.net`" ]
then
projname=`echo $url | cut -b 8- | awk -F . '{print $1}'`
url=http://sourceforge.net/projects/$projname/
fi
wget -q $url -O $title.html
source=`awk -F \" '/File released:/ {print $2}' $title.html | sed s/\\\/download// | grep -v .webloc$`
wget -N $source
if [ $? -ne 0 ]
then
echo;echo ----------------- title = $title :: source = $source;echo
fi
####################
else ########## unknown repo type -- dont know how to scrape
echo $url > url.$i
fi
i=`expr $i + 1`
cd ..
done
rm nospps-project.title nospps-project.url