forked from graeme/MapMyPlaylist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDBPediaScanner.py
114 lines (95 loc) · 3.59 KB
/
DBPediaScanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import rdflib
import re
class DBPediaScanner:
""" Uses the rdflib wrapper to search DBPedia for an artist's
location and geoinformation. Returns this information in appropriate
formats using the methods below.
"""
DBONTO = rdflib.Namespace('http://dbpedia.org/ontology/') # Ontology rdflib Namespace
ONTOLOGIES = (DBONTO.hometown, DBONTO.birthPlace) # Ontologies constant to check for location info
labelPredicate = rdflib.term.URIRef(u'http://www.w3.org/2000/01/rdf-schema#label')
latPredicate = rdflib.URIRef("http://www.w3.org/2003/01/geo/wgs84_pos#lat")
longPredicate = rdflib.URIRef("http://www.w3.org/2003/01/geo/wgs84_pos#long")
locationGraph = rdflib.Graph() # Graph to parse location object to
def __init__(self, artist):
""" Initialiser for the DBPediaScanner class. Takes the artist's
name as a string.
"""
self.artistURI = rdflib.URIRef("http://dbpedia.org/resource/" + re.sub('\s+', '_', artist))
print "Artist URI is:", self.artistURI
self.artistGraph = rdflib.Graph()
self.artistGraph.parse(self.artistURI)
self.__checkDisambiguates()
for ontology in self.ONTOLOGIES:
try: # Checks to see if any objects of type ontology are found
self.locationURI = self.artistGraph.objects(self.artistURI, ontology).next()
print "Ontology of type", ontology, "found!"
return
except (StopIteration): # If none of this type found.
print "No ontology of type", ontology, "found!"
print "No locational info!"
def artistLocationURI(self):
""" Returns the artist's location URI as a string
"""
try:
return str(self.locationURI)
except AttributeError: # If locationURI hasn't been defined
print "LocationURI not defined!"
return None
def artistLocationLabel(self):
""" Returns the artists location label in English as
a string
"""
try:
self.locationGraph.parse(self.locationURI)
except AttributeError: # If locationURI hasn't been defined
print "LocationURI not defined!"
return None
try:
hometown = self.locationGraph.preferredLabel(self.locationURI, lang=u'en')[0][1]
print "English label found!"
print "Hometown is", hometown
return str(hometown)
except IndexError: # If no labels in English are found
print "No English label found!"
try:
hometown = self.locationGraph.objects(self.locationURI, self.labelPredicate).next()
return str(hometown)
except StopIteration: # If generator is empty
print "Empty locationGraph!"
def artistLocationGeo(self):
""" Returns artists hometown latitude and longitude as
floats.
"""
try:
lat = float(self.locationGraph.objects(self.locationURI, self.latPredicate).next())
lon = float(self.locationGraph.objects(self.locationURI, self.longPredicate).next())
print "Latitude is", lat
print "Longitude is", lon
return lat, lon
except StopIteration: # If generator is empty
print "No geodata!"
except AttributeError: # If locationURI hasn't been defined
print "LocationURI not defined!"
def __checkDisambiguates(self):
for stmt in self.artistGraph.objects(rdflib.URIRef(self.artistURI), self.DBONTO.wikiPageDisambiguates):
disamb = str(stmt)
if '(band)' in disamb:
self.__updateGraph(stmt)
print "Disambiguated to :", disamb
break
elif '(singer)' in disamb:
self.__updateGraph(stmt)
print "Disambiguated to :", disamb
break
elif '(group)' in disamb:
self.__updateGraph(stmt)
print "Disambiguated to :", disamb
break
elif '(musician)' in disamb:
self.__updateGraph(stmt)
print "Disambiguated to :", disamb
break
def __updateGraph(self, newURI):
self.artistURI = newURI
self.artistGraph.parse(newURI)