Skip to content

Commit

Permalink
Added aliases for column headers.
Browse files Browse the repository at this point in the history
  • Loading branch information
amarder committed Aug 24, 2011
1 parent dab9c6d commit de45095
Showing 1 changed file with 146 additions and 25 deletions.
171 changes: 146 additions & 25 deletions pyxform/xls2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import codecs
import os


def print_pyobj_to_json(pyobj, path):
fp = codecs.open(path, mode="w", encoding="utf-8")
json.dump(pyobj, fp=fp, ensure_ascii=False, indent=4)
Expand All @@ -24,17 +25,73 @@ def print_pyobj_to_json(pyobj, path):
DICT_CHAR = u":"

TYPE = u"type"
NAME = u"name"
CHOICES = u"choices"
COLUMNS = u"columns"

DISABLED = u'disabled'

# Special reserved values for type column that allow the user to set
# the form's title or id.
SET_TITLE = u"set form title"
SET_ID = u"set form id"
SET_DEFAULT_LANG = u"set default language"

#Conversion dictionary from user friendly column names to meaningful values
col_name_conversions = {
"caption": u"label",
"appearance": u"control:appearance",
"relevance": u"bind:relevant",
"required": u"bind:required",
"read only": u"bind:readonly",
"constraint": u"bind:constraint",
"constraing message": u"bind:jr:constraintMsg",
"calculation": u"bind:calculate",
"command": u"type",
"tag": u"name",
"label": u"caption",
"relevant": u"bind:relevant",
"skippable": u"required",
"value": u"name",
"image": u"media:image",
"audio": u"media:audio",
"video": u"media:video",
"count": u"bind:jr:count"
}

group_name_conversions = {
"looped group": u"repeat"
}

yes_no_conversions = {
"yes": "true()",
"Yes": "true()",
"YES": "true()",
"true": "true()",
"True": "true()",
"TRUE": "true()",
"no": "false()",
"No": "false()",
"NO": "false()",
"false": "false()",
"False": "false()",
"FALSE": "false()"
}


class ExcelReader(object):

def __init__(self, path):
assert isinstance(path, basestring), "path must be a string"
(filepath, filename) = os.path.split(path)
(shortname, extension) = os.path.splitext(filename)
assert extension==".xls", "path must end with .xls"
assert extension == ".xls", "path must end with .xls"
self._path = path
self._name = unicode(shortname)
self._print_name = unicode(shortname)
self._title = unicode(shortname)
self._id = unicode(shortname)
self._def_lang = unicode("English")
self._parse_xls()

def _parse_xls(self):
Expand All @@ -57,13 +114,24 @@ def _create_dict_from_xls(self):
self._dict = {}
for sheet in workbook.sheets():
self._dict[sheet.name] = []
for row in range(1,sheet.nrows):
for row in range(1, sheet.nrows):
row_dict = {}
for column in range(0,sheet.ncols):
key = sheet.cell(0,column).value
value = sheet.cell(row,column).value
if value is not None and value!="": row_dict[key] = value
if row_dict: self._dict[sheet.name].append(row_dict)
for column in range(0, sheet.ncols):
key = sheet.cell(0, column).value

# Convert key from ui friendly to meaningful
if key in col_name_conversions:
key = col_name_conversions[key]
# Special case for converting captions because
# they have languages
key = key.replace("caption", "label")

value = sheet.cell(row, column).value
if value is not None and value != "":
row_dict[key] = value

if row_dict:
self._dict[sheet.name].append(row_dict)

def _set_choices_and_columns_sheet_name(self):
sheet_names = self._dict.keys()
Expand All @@ -77,7 +145,7 @@ def _strip_unicode_values(self):
for sheet_name, dicts in self._dict.items():
for d in dicts:
for k, v in d.items():
if type(v)==unicode:
if type(v) == unicode:
d[k] = v.strip()

def _fix_int_values(self):
Expand All @@ -88,22 +156,22 @@ def _fix_int_values(self):
for sheet_name, dicts in self._dict.items():
for d in dicts:
for k, v in d.items():
if type(v)==float and v==int(v):
if type(v) == float and v == int(v):
d[k] = int(v)

def _group_dictionaries(self):
"""
For each row in the worksheet, group all keys that contain a
colon. So {"text:english" : "hello", "text:french" :
"bonjour"} becomes {"text" : {"english" : "hello", "french" :
colon. So {"text:english": "hello", "text:french" :
"bonjour"} becomes {"text": {"english": "hello", "french" :
"bonjour"}.
"""
for sheet_name, dicts in self._dict.items():
for d in dicts:
groups = {}
for k, v in d.items():
l = k.split(DICT_CHAR)
if len(l)>=2:
if len(l) >= 2:
if l[0] not in groups:
groups[l[0]] = {}
groups[l[0]][DICT_CHAR.join(l[1:])] = v
Expand All @@ -116,7 +184,8 @@ def to_dict(self):
return self._dict

def print_json_to_file(self, filename=""):
if not filename: filename = self._path[:-4] + ".json"
if not filename:
filename = self._path[:-4] + ".json"
print_pyobj_to_json(self.to_dict(), filename)


Expand Down Expand Up @@ -145,22 +214,61 @@ def _process_question_type(self):
let's make it a requirement that list-names have no spaces
"""
to_remove = []
for q in self._dict[SURVEY_SHEET]:
if TYPE not in q:
raise Exception(
"The following question did not specify a question type:\n" + \
json.dumps(q, indent=4)
)
#Did not specify question type: Ignore the question
to_remove.append(q)
continue

if q[TYPE] == SET_TITLE:
if not q[NAME].strip().find(" ") == -1:
raise Exception("Form title must not include any spaces", q[NAME])
self._title = q[NAME]
to_remove.append(q)
continue

if q[TYPE] == SET_ID:
if not q[NAME].strip().find(" ") == -1:
raise Exception("Form id must not include any spaces", q[NAME])
self._id = q[NAME]
to_remove.append(q)
continue

if q[TYPE] == SET_DEFAULT_LANG:
self._def_lang = q[NAME]
to_remove.append(q)
continue

if DISABLED in q:
disabled = q["disabled"]
if disabled in yes_no_conversions:
disabled = yes_no_conversions[disabled]
if disabled == 'true()':
to_remove.append(q)
continue

question_type = q[TYPE]
question_type.strip()
re.sub(r"\s+", " ", question_type)
if question_type.startswith(u"select"):

if u"select" in question_type:
self._prepare_multiple_choice_question(q, question_type)
if question_type.startswith(u"begin loop"):
self._prepare_begin_loop(q, question_type)

if not self._id.find(" ") == -1:
raise Exception("Form id must not include any spaces", self._id)

if not self._name.find(" ") == -1:
self._name = self._id

for q in to_remove:
self._dict[SURVEY_SHEET].remove(q)

def _prepare_multiple_choice_question(self, q, question_type):
m = re.search(r"^(?P<select_command>select one|select all that apply) from (?P<list_name>\S+)( (?P<specify_other>or specify other))?$", question_type)
regexp = r"^(?P<select_command>select one|select all that apply) from (?P<list_name>\S+)( (?P<specify_other>or specify other))?$"
m = re.search(regexp, question_type)
assert m, "unsupported select syntax:" + question_type
assert CHOICES not in q
d = m.groupdict()
Expand Down Expand Up @@ -190,8 +298,10 @@ def _construct_choice_lists(self):
list_name = choice.pop(LIST_NAME)
except KeyError:
raise Exception("For some reason this choice isn't associated with a list.", choice)
if list_name in choices: choices[list_name].append(choice)
else: choices[list_name] = [choice]
if list_name in choices:
choices[list_name].append(choice)
else:
choices[list_name] = [choice]
self._dict[self._lists_sheet_name] = choices

def _insert_lists(self):
Expand All @@ -213,21 +323,32 @@ def _insert_list(self, q, key, lists_by_name):

def _organize_sections(self):
# this needs to happen after columns have been inserted
result = {u"type" : u"survey", u"name" : self._name, u"children" : []}
result = {u"type": u"survey", u"name": self._name, u"children": []}
stack = [result]
for cmd in self._dict:
cmd_type = cmd[u"type"]
match_begin = re.match(r"begin (?P<type>group|repeat|loop)", cmd_type)
match_end = re.match(r"end (?P<type>group|repeat|loop)", cmd_type)
# Todo: combine the begin and end patterns below with those above.
# match_begin = re.match(r"begin (?P<type>lgroup|group|looped group|repeat)", cmd_type)
# match_end = re.match(r"end (?P<type>lgroup|group|looped group|repeat)", cmd_type)
if match_begin:
# start a new section
cmd[u"type"] = match_begin.group(1)

if cmd[u"type"] in group_name_conversions:
cmd[u"type"] = group_name_conversions[cmd[u"type"]]

cmd[u"children"] = []
stack[-1][u"children"].append(cmd)
stack.append(cmd)
elif match_end:
match_end = match_end.group(1)
if match_end in group_name_conversions:
match_end = group_name_conversions[match_end]

begin_cmd = stack.pop()
if begin_cmd[u"type"] != match_end.group(1):
if begin_cmd[u"type"] != match_end:
raise Exception("This end group does not match the previous begin", cmd)
else:
stack[-1][u"children"].append(cmd)
Expand Down Expand Up @@ -262,15 +383,15 @@ def _organize_renames(self):
for d in self._dict[u"Dictionary"]:
if u"Variable Name" in d:
assert d[u"Variable Name"] not in variable_names_so_far, \
d[u"Variable Name"]
d[u"Variable Name"]
variable_names_so_far.append(d[u"Variable Name"])
new_dict[d[u"XPath"]] = d[u"Variable Name"]
else:
variable_names_so_far.append(d[u"XPath"])
self._dict = new_dict


if __name__=="__main__":
if __name__ == "__main__":
# Open the excel file that is the second argument to this python
# call, convert that file to json and save that json to a file
path = sys.argv[1]
Expand Down

0 comments on commit de45095

Please sign in to comment.