From fe87fd2877f62407d390929f9cde8e7dd5706bc2 Mon Sep 17 00:00:00 2001 From: Matt Willis <88482857+mew-nsc@users.noreply.github.com> Date: Sat, 11 Dec 2021 08:45:35 +0000 Subject: [PATCH 1/2] Initial cut of the more generic jchat importer -> Failure tests not passing yet --- importers/jchat_importer.py | 19 ++++++++----------- .../jchat_files/combined_format.html | 6 ++++++ tests/test_load_jchat.py | 8 +++++--- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/importers/jchat_importer.py b/importers/jchat_importer.py index c08d7952c..5933c804c 100644 --- a/importers/jchat_importer.py +++ b/importers/jchat_importer.py @@ -122,21 +122,24 @@ def _read_message_div(self, div, data_store, datafile, change_id): # Older format uses id= # Newer format uses msgid= msg_id = div.attrib["msgid"] # Grabbing ID to help with error reporting - version = JCHAT_MODERN except KeyError: try: msg_id = div.attrib["id"] - version = JCHAT_LEGACY except KeyError: # Ignore any non-comment messages (e.g. connect/disconnect) return - time_element = div.find("{*}tt/font") - # Sample data included some "Marker" messages with the id="marker" if str.upper(msg_id) == "MARKER": return # Ignore these messages + text_blocks = [] + text_blocks.append([item for item in div.findall(".//*") if item.text]) + if text_blocks[0]: + time_element = text_blocks[0][0] + platform_element = text_blocks[0][1] + msg_content_element = text_blocks[0][2:] + if time_element is None: self.errors.append( {self.error_type: f"Unable to read message {msg_id}. No timestamp provided"} @@ -147,11 +150,6 @@ def _read_message_div(self, div, data_store, datafile, change_id): timestamp = self.parse_timestamp(time_string, msg_id) time_element.record(self.name, "timestamp", timestamp) - if version == JCHAT_LEGACY: - platform_element = div.find("{*}b/a/font") - else: # version == JCHAT_MODERN - platform_element = div.find("{*}b/font/a") - if platform_element is None: self.errors.append( {self.error_type: f"Unable to read message {msg_id}. No platform provided"} @@ -162,14 +160,13 @@ def _read_message_div(self, div, data_store, datafile, change_id): # Match on quadgraphs platform = self.get_cached_platform_from_quad(data_store, platform_quad, change_id) - msg_content_element = [element for element in div.iterfind("font")] - if not msg_content_element: self.errors.append( {self.error_type: f"Unable to read message {msg_id}. No message provided"} ) return msg_content = self.parse_message_content(msg_content_element) + print(msg_content) if not msg_content: self.errors.append({self.error_type: f"Unable to parse JChat message {msg_id}."}) return diff --git a/tests/sample_data/jchat_files/combined_format.html b/tests/sample_data/jchat_files/combined_format.html index 505db8e57..dd1e2e1df 100644 --- a/tests/sample_data/jchat_files/combined_format.html +++ b/tests/sample_data/jchat_files/combined_format.html @@ -22,5 +22,11 @@