-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSubParsers.py
executable file
·67 lines (59 loc) · 2.15 KB
/
SubParsers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def parsesrt(path):
import re
document = open(path, 'r').read()
results = re.compile(r"""
(\d+)\s*?\n # line number
(\d+:\d+:\d+,\d+)\s*?-->\s*?(\d+:\d+:\d+,\d+) # start --> end
([\s\S]+?) # subtitle text
(?=\n\d+\n+\d+:\d+:\d+,\d+\s*?-->|\Z) # stop at next subtitle or end of document
""", re.VERBOSE).findall(document)
arr = []
for tup in results:
_dic = {"line": tup[0], "start": tup[1], "end": tup[2], "text": tup[3].strip()}
arr.append(_dic)
return arr
def parseass(path):
import re
document = open(path, 'r').read()
arr = []
header = re.compile(r"\[Events\]\s*?\nFormat:\s*([\s\S]+?)\n").findall(document)[0].lower().split(',')
for i, field in zip(range(len(header)), header):
header[i] = field.strip()
data = re.compile(r"\nDialogue:([\s\S]+?)(?=\nDialogue:|\Z)").findall(document)
for datum in data:
tokens = datum.split(',')
for i, token in zip(range(len(tokens)), tokens):
tokens[i] = token.strip()
dic = {k: v for (k, v) in zip(header, tokens)}
arr.append(dic)
return arr
def parsessa(path):
return parseass(path)
def parsevtt(path):
import re
document = open(path, 'r').read()
results = re.compile(r"""
(\d+)\s*?\n # subtitle number
(\d+:\d+:\d+.\d+)\s*?-->\s*?(\d+:\d+:\d+.\d+)\s* # start --> end
# optional parameters
(?:position:([^\n]+?))?
(?:align:([^\n]+?))?
(?:size:([^\n]+?))?
(?:line:([^\n]+))?
\n
([\s\S]+?) # subtitle text
(?=\s*?\d+\s*?\n\s*\d+:\d+:\d+.\d+\s*?-->|\Z) # stop at next subtitle or end of document
""", re.VERBOSE).findall(document)
arr = []
for tup in results:
_dic = {"line": tup[0], "start": tup[1], "end": tup[2], "text": tup[-1].strip()}
arr.append(_dic)
return arr
def parsesubs(path):
extdic = {'ass': parseass, 'ssa': parsessa, 'vtt': parsevtt, 'srt': parsesrt}
ext = path.split(".")[-1].lower().strip()
funct = extdic.get(ext)
if funct is not None:
return(funct(path))
else:
return(None) # unsupported filetype