-
Notifications
You must be signed in to change notification settings - Fork 1
/
parseFrameFile.py
57 lines (44 loc) · 1.92 KB
/
parseFrameFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
import sqlite3 as db
import sys
import os
def parseFrameFiles(c, framePath, tagPrefix = "{http://framenet.icsi.berkeley.edu}"):
fileNames = os.listdir(framePath)
FrameTableData=[]
FETableData = []
LUTableData = []
for fileName in fileNames:
if fileName.split('.')[-1] != 'xml':
continue
tree = ET.ElementTree(file = framePath+fileName)
root = tree.getroot()
frameID = int(root.attrib.get('ID'))
frameName = root.attrib.get('name')
frameData = (frameID, frameName)
FrameTableData.append(frameData)
for child in root:
if child.tag == tagPrefix+"FE":
feID = child.attrib.get('ID')
feName = child.attrib.get('name')
feCoreType = child.attrib.get('coreType')
feAbbrev = child.attrib.get('abbrev')
feData = (feID, feName, frameID, feCoreType, feAbbrev)
FETableData.append(feData)
if child.tag == tagPrefix+"lexUnit":
luID = child.attrib.get('ID')
luName = child.attrib.get('name')
luPOS = child.attrib.get('POS')
for subchild in child:
if subchild.tag == tagPrefix+"sentenceCount":
luAnnotated = int(subchild.attrib.get('annotated'))
luData = (luID, luName, frameID, luPOS, luAnnotated)
LUTableData.append(luData)
print "Frame Table # rows: ", len(FrameTableData)
print "FE Table # rows: ", len(FETableData)
print "LU Table # rows: ", len(LUTableData)
c.executemany('Insert into FRAME Values(?, ?)', FrameTableData)
c.executemany('Insert into FE Values(?, ?, ?, ?, ?)', FETableData)
c.executemany('Insert into LU Values(?, ?, ?, ?, ?)', LUTableData)