-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert.py
192 lines (160 loc) · 6.99 KB
/
convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#!/usr/bin/python
#coding: utf8
from sys import argv
from constants import Constants
import sys
import json
import codecs
from string import digits, ascii_uppercase, ascii_lowercase
from itertools import product
class TokenizerState:
START, SINGLE_CHARACTER, WORD, CURLY1, CURLY2, CURLY3 = range(6)
class Tokenizer:
SINGLE_CHARACTERS = u' :;/.",()‘—\n\r\t'
CURLY_CHARACTER_START = u'{'
CURLY_CHARACTER_END = u'}'
def scan(self, text):
if not text:
return []
tokens = []
tokenStartIndex = 0
index = 0
state = TokenizerState.START
while index < len(text):
nextState = self.getStateTransition(state, text[index:index + 1])
if nextState is None:
tokens.append(text[tokenStartIndex:index])
tokenStartIndex = index
state = TokenizerState.START
else:
state = nextState
index = index + 1
tokens.append(text[tokenStartIndex:index])
return tokens
def getStateTransition(self, state, character):
if state == TokenizerState.START:
if character in Tokenizer.SINGLE_CHARACTERS:
return TokenizerState.SINGLE_CHARACTER
elif character in Tokenizer.CURLY_CHARACTER_START:
return TokenizerState.CURLY1
else:
return TokenizerState.WORD
elif state == TokenizerState.SINGLE_CHARACTER:
return None
elif state == TokenizerState.WORD:
if character in Tokenizer.SINGLE_CHARACTERS:
return None
elif character in Tokenizer.CURLY_CHARACTER_START:
return None
else:
return TokenizerState.WORD
elif state == TokenizerState.CURLY1:
return TokenizerState.CURLY2
elif state == TokenizerState.CURLY2:
if character in Tokenizer.CURLY_CHARACTER_START:
return None
elif character in Tokenizer.CURLY_CHARACTER_END:
return TokenizerState.CURLY3
else:
return TokenizerState.CURLY2
elif state == TokenizerState.CURLY3:
return None
else:
print '???'
sys.exit(2)
def generateDictionaryEncodings():
dictionaryEncodings = []
chars = digits + ascii_uppercase + ascii_lowercase
for n in range(1, Constants.MAX_ENCODING_CHARACTERS + 1):
for comb in product(chars, repeat = n):
dictionaryEncodings.append(''.join(comb))
return dictionaryEncodings
def readCardsFromFile(fileName):
cardsJson = ''
with codecs.open(fileName, encoding = 'utf-8') as fileHandle:
fileContents = fileHandle.read()
cardsJson = json.loads(fileContents)
return cardsJson
def replaceCardNameWithTHIS(cards):
for card in cards.values():
if 'text' in card:
card['text'] = card['text'].replace(card['name'], '$THIS')
def createEncodingAndDecodingDictionaries(cards):
uniqueTokens = getAllUniqueTokens(cards)
sortedUniqueTokens = list(uniqueTokens)
sortedUniqueTokens.sort()
encodingDictionary = {}
decodingDictionary = {}
for token in sortedUniqueTokens:
dictionaryKey = ''
if len(token) == 1 and 32 <= ord(token) and ord(token) <= 47:
dictionaryKey = token
elif len(token) == 1 and ord(token) == 10:
dictionaryKey = '~'
else:
dictionaryKey = dictionaryEncodings.pop()
dictionaryValue = token
decodingDictionary[dictionaryKey] = dictionaryValue
encodingDictionary[dictionaryValue] = dictionaryKey
return (encodingDictionary, decodingDictionary)
def getAllUniqueTokens(cards):
allUniqueTokens = set()
for card in cards.values():
allUniqueTokens = allUniqueTokens | set(getTokensForCard(card))
return allUniqueTokens
def getTokensForCard(card):
tokens = []
tokens.extend(getTokensForCardField(card, 'name'))
tokens.extend(getTokensForCardField(card, 'manaCost'))
tokens.extend(getTokensForCardField(card, 'type'))
tokens.extend(getTokensForCardField(card, 'text'))
tokens.extend(getTokensForCardField(card, 'power'))
tokens.extend(getTokensForCardField(card, 'toughness'))
return tokens
def getTokensForCardField(card, fieldName):
tokenizer = Tokenizer()
if fieldName in card:
return tokenizer.scan(card[fieldName])
else:
return []
def writeDictionaryAsJsonToFile(dictionary, fileName):
with codecs.open(fileName, mode='w', encoding='utf-8') as fileHandle:
json.dump(dictionary, fileHandle, sort_keys=True, indent = 4)
def getFormattedCard(card, encodingDictionary):
cardFields = []
if 'name' in card:
tokens = getTokensForCardField(card, 'name')
cardFields.append(getEncodedTokenString(tokens, encodingDictionary))
if 'manaCost' in card:
tokens = getTokensForCardField(card, 'manaCost')
cardFields.append(getEncodedTokenString(tokens, encodingDictionary))
if 'type' in card:
tokens = getTokensForCardField(card, 'type')
cardFields.append(getEncodedTokenString(tokens, encodingDictionary))
if 'text' in card:
tokens = getTokensForCardField(card, 'text')
cardFields.append(getEncodedTokenString(tokens, encodingDictionary))
if 'power' in card and 'toughness' in card:
powerTokens = getTokensForCardField(card, 'power')
toughnessTokens = getTokensForCardField(card, 'toughness')
cardFields.append(getEncodedTokenString(powerTokens, encodingDictionary) + encodingDictionary['/'] + getEncodedTokenString(toughnessTokens, encodingDictionary))
return Constants.SECTION_SEPARATOR.join(cardFields)
def getEncodedTokenString(tokens, encodingDictionary):
encodedTokens = []
for token in tokens:
encodedTokens.append(encodingDictionary[token])
return ''.join(encodedTokens)
######################################################################
if len(argv) != 4:
print >> sys.stderr, 'Usage: ' + argv[0] + ' fileName, encodingDictionaryFileName, decodingDictionaryFileName'
exit(1)
script, fileName, encodingDictionaryFileName, decodingDictionaryFileName = argv
dictionaryEncodings = generateDictionaryEncodings()
dictionaryEncodings.reverse()
cards = readCardsFromFile(fileName)
replaceCardNameWithTHIS(cards)
encodingDictionary, decodingDictionary = createEncodingAndDecodingDictionaries(cards)
writeDictionaryAsJsonToFile(encodingDictionary, encodingDictionaryFileName)
writeDictionaryAsJsonToFile(decodingDictionary, decodingDictionaryFileName)
for card in cards.values():
print getFormattedCard(card, encodingDictionary).encode('utf-8')