Added aliases for column headers.

XLSForm · Aug 24, 2011 · de45095 · de45095
1 parent dab9c6d
commit de45095
Showing 1 changed file with 146 additions and 25 deletions.
diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py
@@ -9,6 +9,7 @@
 import codecs
 import os
 
+
 def print_pyobj_to_json(pyobj, path):
     fp = codecs.open(path, mode="w", encoding="utf-8")
     json.dump(pyobj, fp=fp, ensure_ascii=False, indent=4)
@@ -24,17 +25,73 @@ def print_pyobj_to_json(pyobj, path):
 DICT_CHAR = u":"
 
 TYPE = u"type"
+NAME = u"name"
 CHOICES = u"choices"
 COLUMNS = u"columns"
 
+DISABLED = u'disabled'
+
+# Special reserved values for type column that allow the user to set
+# the form's title or id.
+SET_TITLE = u"set form title"
+SET_ID = u"set form id"
+SET_DEFAULT_LANG = u"set default language"
+
+#Conversion dictionary from user friendly column names to meaningful values
+col_name_conversions = {
+    "caption": u"label",
+    "appearance": u"control:appearance",
+    "relevance": u"bind:relevant",
+    "required": u"bind:required",
+    "read only": u"bind:readonly",
+    "constraint": u"bind:constraint",
+    "constraing message": u"bind:jr:constraintMsg",
+    "calculation": u"bind:calculate",
+    "command": u"type",
+    "tag": u"name",
+    "label": u"caption",
+    "relevant": u"bind:relevant",
+    "skippable": u"required",
+    "value": u"name",
+    "image": u"media:image",
+    "audio": u"media:audio",
+    "video": u"media:video",
+    "count": u"bind:jr:count"
+}
+
+group_name_conversions = {
+    "looped group": u"repeat"
+}
+
+yes_no_conversions = {
+        "yes": "true()",
+        "Yes": "true()",
+        "YES": "true()",
+        "true": "true()",
+        "True": "true()",
+        "TRUE": "true()",
+        "no": "false()",
+        "No": "false()",
+        "NO": "false()",
+        "false": "false()",
+        "False": "false()",
+        "FALSE": "false()"
+    }
+
+
 class ExcelReader(object):
+
     def __init__(self, path):
         assert isinstance(path, basestring), "path must be a string"
         (filepath, filename) = os.path.split(path)
         (shortname, extension) = os.path.splitext(filename)
-        assert extension==".xls", "path must end with .xls"
+        assert extension == ".xls", "path must end with .xls"
         self._path = path
         self._name = unicode(shortname)
+        self._print_name = unicode(shortname)
+        self._title = unicode(shortname)
+        self._id = unicode(shortname)
+        self._def_lang = unicode("English")
         self._parse_xls()
 
     def _parse_xls(self):
@@ -57,13 +114,24 @@ def _create_dict_from_xls(self):
         self._dict = {}
         for sheet in workbook.sheets():
             self._dict[sheet.name] = []
-            for row in range(1,sheet.nrows):
+            for row in range(1, sheet.nrows):
                 row_dict = {}
-                for column in range(0,sheet.ncols):
-                    key = sheet.cell(0,column).value
-                    value = sheet.cell(row,column).value
-                    if value is not None and value!="": row_dict[key] = value
-                if row_dict: self._dict[sheet.name].append(row_dict)
+                for column in range(0, sheet.ncols):
+                    key = sheet.cell(0, column).value
+
+                    # Convert key from ui friendly to meaningful
+                    if key in col_name_conversions:
+                        key = col_name_conversions[key]
+                    # Special case for converting captions because
+                    # they have languages
+                    key = key.replace("caption", "label")
+
+                    value = sheet.cell(row, column).value
+                    if value is not None and value != "":
+                        row_dict[key] = value
+
+                if row_dict:
+                    self._dict[sheet.name].append(row_dict)
 
     def _set_choices_and_columns_sheet_name(self):
         sheet_names = self._dict.keys()
@@ -77,7 +145,7 @@ def _strip_unicode_values(self):
         for sheet_name, dicts in self._dict.items():
             for d in dicts:
                 for k, v in d.items():
-                    if type(v)==unicode:
+                    if type(v) == unicode:
                         d[k] = v.strip()
 
     def _fix_int_values(self):
@@ -88,22 +156,22 @@ def _fix_int_values(self):
         for sheet_name, dicts in self._dict.items():
             for d in dicts:
                 for k, v in d.items():
-                    if type(v)==float and v==int(v):
+                    if type(v) == float and v == int(v):
                         d[k] = int(v)
 
     def _group_dictionaries(self):
         """
         For each row in the worksheet, group all keys that contain a
-        colon. So {"text:english" : "hello", "text:french" :
-        "bonjour"} becomes {"text" : {"english" : "hello", "french" :
+        colon. So {"text:english": "hello", "text:french" :
+        "bonjour"} becomes {"text": {"english": "hello", "french" :
         "bonjour"}.
         """
         for sheet_name, dicts in self._dict.items():
             for d in dicts:
                 groups = {}
                 for k, v in d.items():
                     l = k.split(DICT_CHAR)
-                    if len(l)>=2:
+                    if len(l) >= 2:
                         if l[0] not in groups:
                             groups[l[0]] = {}
                         groups[l[0]][DICT_CHAR.join(l[1:])] = v
@@ -116,7 +184,8 @@ def to_dict(self):
         return self._dict
 
     def print_json_to_file(self, filename=""):
-        if not filename: filename = self._path[:-4] + ".json"
+        if not filename:
+            filename = self._path[:-4] + ".json"
         print_pyobj_to_json(self.to_dict(), filename)
 
 
@@ -145,22 +214,61 @@ def _process_question_type(self):
 
         let's make it a requirement that list-names have no spaces
         """
+        to_remove = []
         for q in self._dict[SURVEY_SHEET]:
             if TYPE not in q:
-                raise Exception(
-                    "The following question did not specify a question type:\n" + \
-                    json.dumps(q, indent=4)
-                    )
+                #Did not specify question type: Ignore the question
+                to_remove.append(q)
+                continue
+
+            if q[TYPE] == SET_TITLE:
+                if not q[NAME].strip().find(" ") == -1:
+                    raise Exception("Form title must not include any spaces", q[NAME])
+                self._title = q[NAME]
+                to_remove.append(q)
+                continue
+
+            if q[TYPE] == SET_ID:
+                if not q[NAME].strip().find(" ") == -1:
+                    raise Exception("Form id must not include any spaces", q[NAME])
+                self._id = q[NAME]
+                to_remove.append(q)
+                continue
+
+            if q[TYPE] == SET_DEFAULT_LANG:
+                self._def_lang = q[NAME]
+                to_remove.append(q)
+                continue
+
+            if DISABLED in q:
+                disabled = q["disabled"]
+                if disabled in yes_no_conversions:
+                    disabled = yes_no_conversions[disabled]
+                if disabled == 'true()':
+                    to_remove.append(q)
+                continue
+
             question_type = q[TYPE]
             question_type.strip()
             re.sub(r"\s+", " ", question_type)
-            if question_type.startswith(u"select"):
+
+            if u"select" in question_type:
                 self._prepare_multiple_choice_question(q, question_type)
             if question_type.startswith(u"begin loop"):
                 self._prepare_begin_loop(q, question_type)
 
+        if not self._id.find(" ") == -1:
+            raise Exception("Form id must not include any spaces", self._id)
+
+        if not self._name.find(" ") == -1:
+            self._name = self._id
+
+        for q in to_remove:
+            self._dict[SURVEY_SHEET].remove(q)
+
     def _prepare_multiple_choice_question(self, q, question_type):
-        m = re.search(r"^(?P<select_command>select one|select all that apply) from (?P<list_name>\S+)( (?P<specify_other>or specify other))?$", question_type)
+        regexp = r"^(?P<select_command>select one|select all that apply) from (?P<list_name>\S+)( (?P<specify_other>or specify other))?$"
+        m = re.search(regexp, question_type)
         assert m, "unsupported select syntax:" + question_type
         assert CHOICES not in q
         d = m.groupdict()
@@ -190,8 +298,10 @@ def _construct_choice_lists(self):
                 list_name = choice.pop(LIST_NAME)
             except KeyError:
                 raise Exception("For some reason this choice isn't associated with a list.", choice)
-            if list_name in choices: choices[list_name].append(choice)
-            else: choices[list_name] = [choice]
+            if list_name in choices:
+                choices[list_name].append(choice)
+            else:
+                choices[list_name] = [choice]
         self._dict[self._lists_sheet_name] = choices
 
     def _insert_lists(self):
@@ -213,21 +323,32 @@ def _insert_list(self, q, key, lists_by_name):
 
     def _organize_sections(self):
         # this needs to happen after columns have been inserted
-        result = {u"type" : u"survey", u"name" : self._name, u"children" : []}
+        result = {u"type": u"survey", u"name": self._name, u"children": []}
         stack = [result]
         for cmd in self._dict:
             cmd_type = cmd[u"type"]
             match_begin = re.match(r"begin (?P<type>group|repeat|loop)", cmd_type)
             match_end = re.match(r"end (?P<type>group|repeat|loop)", cmd_type)
+            # Todo: combine the begin and end patterns below with those above.
+            # match_begin = re.match(r"begin (?P<type>lgroup|group|looped group|repeat)", cmd_type)
+            # match_end = re.match(r"end (?P<type>lgroup|group|looped group|repeat)", cmd_type)
             if match_begin:
                 # start a new section
                 cmd[u"type"] = match_begin.group(1)
+
+                if cmd[u"type"] in group_name_conversions:
+                    cmd[u"type"] = group_name_conversions[cmd[u"type"]]
+
                 cmd[u"children"] = []
                 stack[-1][u"children"].append(cmd)
                 stack.append(cmd)
             elif match_end:
+                match_end = match_end.group(1)
+                if match_end in group_name_conversions:
+                    match_end = group_name_conversions[match_end]
+
                 begin_cmd = stack.pop()
-                if begin_cmd[u"type"] != match_end.group(1):
+                if begin_cmd[u"type"] != match_end:
                     raise Exception("This end group does not match the previous begin", cmd)
             else:
                 stack[-1][u"children"].append(cmd)
@@ -262,15 +383,15 @@ def _organize_renames(self):
         for d in self._dict[u"Dictionary"]:
             if u"Variable Name" in d:
                 assert d[u"Variable Name"] not in variable_names_so_far, \
-                    d[u"Variable Name"]                
+                    d[u"Variable Name"]
                 variable_names_so_far.append(d[u"Variable Name"])
                 new_dict[d[u"XPath"]] = d[u"Variable Name"]
             else:
                 variable_names_so_far.append(d[u"XPath"])
         self._dict = new_dict
 
 
-if __name__=="__main__":
+if __name__ == "__main__":
     # Open the excel file that is the second argument to this python
     # call, convert that file to json and save that json to a file
     path = sys.argv[1]