diff --git a/importers/jchat_importer.py b/importers/jchat_importer.py index c08d7952c..1ca0272a1 100644 --- a/importers/jchat_importer.py +++ b/importers/jchat_importer.py @@ -122,24 +122,29 @@ def _read_message_div(self, div, data_store, datafile, change_id): # Older format uses id= # Newer format uses msgid= msg_id = div.attrib["msgid"] # Grabbing ID to help with error reporting - version = JCHAT_MODERN except KeyError: try: msg_id = div.attrib["id"] - version = JCHAT_LEGACY except KeyError: # Ignore any non-comment messages (e.g. connect/disconnect) return - time_element = div.find("{*}tt/font") - # Sample data included some "Marker" messages with the id="marker" if str.upper(msg_id) == "MARKER": return # Ignore these messages - if time_element is None: + text_blocks = [] + text_blocks.append([item for item in div.findall(".//*") if item.text]) + if text_blocks[0]: + time_element = text_blocks[0][0] + platform_element = text_blocks[0][1] + msg_content_element = text_blocks[0][2:] + + if not text_blocks[0] or len(text_blocks[0]) < 3: self.errors.append( - {self.error_type: f"Unable to read message {msg_id}. No timestamp provided"} + { + self.error_type: f"Unable to read message {msg_id}. Not enough parts (expecting timestamp, platform, message)" + } ) return @@ -147,29 +152,13 @@ def _read_message_div(self, div, data_store, datafile, change_id): timestamp = self.parse_timestamp(time_string, msg_id) time_element.record(self.name, "timestamp", timestamp) - if version == JCHAT_LEGACY: - platform_element = div.find("{*}b/a/font") - else: # version == JCHAT_MODERN - platform_element = div.find("{*}b/font/a") - - if platform_element is None: - self.errors.append( - {self.error_type: f"Unable to read message {msg_id}. No platform provided"} - ) - return platform_quad = platform_element.text[0:4] platform_element.record(self.name, "platform", platform_quad) # Match on quadgraphs platform = self.get_cached_platform_from_quad(data_store, platform_quad, change_id) - msg_content_element = [element for element in div.iterfind("font")] - - if not msg_content_element: - self.errors.append( - {self.error_type: f"Unable to read message {msg_id}. No message provided"} - ) - return msg_content = self.parse_message_content(msg_content_element) + if not msg_content: self.errors.append({self.error_type: f"Unable to parse JChat message {msg_id}."}) return diff --git a/tests/sample_data/jchat_files/combined_format.html b/tests/sample_data/jchat_files/combined_format.html index 505db8e57..dd1e2e1df 100644 --- a/tests/sample_data/jchat_files/combined_format.html +++ b/tests/sample_data/jchat_files/combined_format.html @@ -22,5 +22,11 @@
[08010809A]ABCD_CMDLegacy - font a swap - has i tag - no breaks
+
+ [23112654A]SPLA_ABModern 2 -
no i tag
but has multiple
breaks
+
+
+ [06010709A]SPLB_XOModern 2 - no i tag - no breaks +
\ No newline at end of file diff --git a/tests/test_load_jchat.py b/tests/test_load_jchat.py index 22690902e..97832efe7 100644 --- a/tests/test_load_jchat.py +++ b/tests/test_load_jchat.py @@ -495,11 +495,11 @@ def test_combined_format(self): with self.store.session_scope(): # there must be states after the import comments = self.store.session.query(self.store.db_classes.Comment).all() - assert len(comments) == 5 + assert len(comments) == 7 # there must be platforms after the import platforms = self.store.session.query(self.store.db_classes.Platform).all() - assert len(platforms) == 3 + assert len(platforms) == 5 # there must be one datafile afterwards datafiles = self.store.session.query(self.store.db_classes.Datafile).all() @@ -510,12 +510,14 @@ def test_combined_format(self): .order_by(self.store.db_classes.Comment.time) .all() ) - assert len(results) == 5 + assert len(results) == 7 assert results[0].content == "Modern - has i tag" assert results[1].content == "Modern - no i tag but has multiple breaks" assert results[2].content == "Modern - no i tag - no breaks" assert results[3].content == "Legacy - font a swap - no i tag - no breaks" assert results[4].content == "Legacy - font a swap - has i tag - no breaks" + assert results[5].content == "Modern 2 - no i tag but has multiple breaks" + assert results[6].content == "Modern 2 - no i tag - no breaks" def test_invalid_missing_timestamp(self): html_string = """ @@ -537,7 +539,7 @@ def test_invalid_missing_timestamp(self): check_errors_for_file_contents( html_string, - "Unable to read message 34544=34534. No timestamp provided", + "Unable to read message 34544=34534. Not enough parts (expecting timestamp, platform, message)", importer, "no_timestamp.html", ) @@ -588,7 +590,7 @@ def test_invalid_missing_platform(self): check_errors_for_file_contents( html_string, - "Unable to read message 34544=34534. No platform provided", + "Unable to read message 34544=34534. Not enough parts (expecting timestamp, platform, message)", importer, "no_platform.html", ) @@ -613,7 +615,7 @@ def test_invalid_missing_message(self): check_errors_for_file_contents( html_string, - "Unable to read message 34544=34534. No message provided", + "Unable to read message 34544=34534. Not enough parts (expecting timestamp, platform, message)", importer, "no_message", ) @@ -638,7 +640,10 @@ def test_empty_message(self): importer = JChatImporter() check_errors_for_file_contents( - html_string, "Unable to parse JChat message 34544=34534.", importer, "no_message" + html_string, + "Unable to read message 34544=34534. Not enough parts (expecting timestamp, platform, message)", + importer, + "no_message", ) @staticmethod