From ef3ff5025bbced8e57710d4259aa7c136aaad69f Mon Sep 17 00:00:00 2001 From: Thomas Miller Date: Sun, 17 Mar 2024 12:19:08 -0500 Subject: [PATCH] Fix bug when missing workbook relationship Add a check to see if the list is empty before trying to access it's contents. If an excel file has an overridden relationship with no word "book" in the name it will attempt to grab the first item of an empty list when looking up workbook relationships. IndexError: list index out of range There could be a better fix to this issue I'm not well enough versed in the xslx specification. The following xlsx file caused the issue. $ unzip -l some_file.xlsx Archive: some_file.xlsx Length Date Time Name --------- ---------- ----- ---- 142 02-06-2024 13:28 xl/worksheets/_rels/sheet1.xml.rels 65968555 02-06-2024 13:28 xl/worksheets/sheet1.xml 2078037 02-06-2024 13:28 xl/sharedStrings.xml 9867 02-06-2024 13:28 xl/styles.xml 566 02-06-2024 13:28 xl/_rels/workbook.xml.rels 388 02-06-2024 13:28 xl/workbook.xml 297 02-06-2024 13:28 _rels/.rels 1122 02-06-2024 13:28 [Content_Types].xml --------- ------- 68058974 8 files In `[Content_types].xml` it is overriding the relationships to point at `_rels/.rels` rather than `xl/_rels/workbook.xml.rels`. This causes the `workbook_relationships` list to be empty causes the error mentioned above. One can see that it does indeed have a workbook relationship, however it is being overridden. `[Contenet_types].xml`: `xl/_rels/workbook.xml.rels`: --- xlsx2csv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xlsx2csv.py b/xlsx2csv.py index bef3cf6..b3ac7a5 100755 --- a/xlsx2csv.py +++ b/xlsx2csv.py @@ -222,8 +222,11 @@ def __init__(self, xlsxfile, **options): self.shared_strings = self._parse(SharedStrings, self.content_types.types["shared_strings"]) self.styles = self._parse(Styles, self.content_types.types["styles"]) self.workbook = self._parse(Workbook, self.content_types.types["workbook"]) - workbook_relationships = list(filter(lambda r: "book" in r, self.content_types.types["relationships"]))[0] - self.workbook.relationships = self._parse(Relationships, workbook_relationships) + workbook_relationships = list(filter(lambda r: "book" in r, self.content_types.types["relationships"])) + if len(workbook_relationships) > 0: + self.workbook.relationships = self._parse(Relationships, workbook_relationships[0]) + else: + self.workbook.relationships = Relationships() if self.options['no_line_breaks']: self.shared_strings.replace_line_breaks() elif self.options['escape_strings']: