-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMiningCSV.py
103 lines (82 loc) · 2.54 KB
/
MiningCSV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 19 15:25:21 2018
@author: mayur
"""
import pandas as pd
df = pd.read_csv('E:\mayur\dbpedia\Querying-dbpedia-for-named-entity-recognition\Data\Topic Mining Trial.csv')
BASE_URL = 'http://api.dbpedia-spotlight.org/en/annotate?text={text}&confidence={confidence}&support={support}'
CONFIDENCE = '0.5'
SUPPORT = '50'
#df['flag']
for i in range(len(df)):
#i=0
Text = df.loc[i]['Article']
Text = Text.split()
Text1 = [word for word in Text if word not in stopwords.words('english')]
TEXT = ' '.join(Text1)
REQUEST = BASE_URL.format(
text=urllib.parse.quote_plus(TEXT),
confidence=CONFIDENCE,
support=SUPPORT
)
HEADERS = {'Accept': 'application/json'}
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
all_urls = []
r = requests.get(url = REQUEST , headers=HEADERS)
print (1)
response = r.json()
resources = response['Resources']
print (2)
for res in resources:
all_urls.append(res['@URI'])
x = list()
y = list()
for i in range(len(all_urls)):
#i=0
values = '(<{0}>)'.format(all_urls[i])
# values = '(<{0}>)'.format('>) (<'.join(all_urls))
sparql.setQuery(
"""PREFIX vrank:<http://purl.org/voc/vrank#>
SELECT DISTINCT ?l ?rank ?sname
FROM <http://dbpedia.org>
FROM <http://people.aifb.kit.edu/ath/#DBpedia_PageRank>
WHERE {
VALUES (?s) {""" + values +
""" }
?s rdf:type ?p .
?p rdfs:label ?l.
?s dct:subject ?sub .
?sub rdfs:label ?sname.
FILTER (lang(?l) = 'en')
} limit 6
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
x.append([])
y.append([])
for result in results["results"]["bindings"]:
x[i].append( result['l']['value'])
for result in results["results"]["bindings"]:
y[i].append( result['sname']['value'])
print (x,y)
"""
item = list()
for res in resources:
item.append(res['@surfaceForm'])
mainlist = {}
j = 0
for i in item:
mainlist[i] = x[j]
j = j +1
x = list()
for i in mainlist:
if mainlist[i][:]:
print(i,':', mainlist[i][:])
print('\n')
x.append(i)
y.append([x])
#x = ' '.join(x)
#df['Tags'] = x
df['Tags']= y
"""