Skip to content

Commit

Permalink
Merge pull request #1113 from debrief/1109_new_jchat_format
Browse files Browse the repository at this point in the history
1109 new jchat format
  • Loading branch information
mew-nsc authored Dec 14, 2021
2 parents e25e594 + bb8d8b5 commit 60ad4cd
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 30 deletions.
35 changes: 12 additions & 23 deletions importers/jchat_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,54 +122,43 @@ def _read_message_div(self, div, data_store, datafile, change_id):
# Older format uses id=
# Newer format uses msgid=
msg_id = div.attrib["msgid"] # Grabbing ID to help with error reporting
version = JCHAT_MODERN
except KeyError:
try:
msg_id = div.attrib["id"]
version = JCHAT_LEGACY
except KeyError:
# Ignore any non-comment messages (e.g. connect/disconnect)
return

time_element = div.find("{*}tt/font")

# Sample data included some "Marker" messages with the id="marker"
if str.upper(msg_id) == "MARKER":
return # Ignore these messages

if time_element is None:
text_blocks = []
text_blocks.append([item for item in div.findall(".//*") if item.text])
if text_blocks[0]:
time_element = text_blocks[0][0]
platform_element = text_blocks[0][1]
msg_content_element = text_blocks[0][2:]

if not text_blocks[0] or len(text_blocks[0]) < 3:
self.errors.append(
{self.error_type: f"Unable to read message {msg_id}. No timestamp provided"}
{
self.error_type: f"Unable to read message {msg_id}. Not enough parts (expecting timestamp, platform, message)"
}
)
return

time_string = time_element.text.strip("[").strip("]")
timestamp = self.parse_timestamp(time_string, msg_id)
time_element.record(self.name, "timestamp", timestamp)

if version == JCHAT_LEGACY:
platform_element = div.find("{*}b/a/font")
else: # version == JCHAT_MODERN
platform_element = div.find("{*}b/font/a")

if platform_element is None:
self.errors.append(
{self.error_type: f"Unable to read message {msg_id}. No platform provided"}
)
return
platform_quad = platform_element.text[0:4]
platform_element.record(self.name, "platform", platform_quad)
# Match on quadgraphs
platform = self.get_cached_platform_from_quad(data_store, platform_quad, change_id)

msg_content_element = [element for element in div.iterfind("font")]

if not msg_content_element:
self.errors.append(
{self.error_type: f"Unable to read message {msg_id}. No message provided"}
)
return
msg_content = self.parse_message_content(msg_content_element)

if not msg_content:
self.errors.append({self.error_type: f"Unable to parse JChat message {msg_id}."})
return
Expand Down
6 changes: 6 additions & 0 deletions tests/sample_data/jchat_files/combined_format.html
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,11 @@
<div id="34abc=34537">
<tt><font>[08010809A]</font></tt><b><a href=""><font>ABCD_CMD</font></a></b><font><i>Legacy - font a swap - has i tag - no breaks</i></font>
</div>
<div msgid="34bbb=34236">
<font><tt>[23112654A]</tt></font><font><b><a href="">SPLA_AB</a></font></b><font>Modern 2 - <br>no i tag<br>but has multiple<br>breaks</font>
</div>
<div msgid="34bbb=34235">
<font><tt>[06010709A]</tt></font><font><b><a href="">SPLB_XO</a></font></b><font>Modern 2 - no i tag - no breaks</font>
</div>
</body>
</html>
19 changes: 12 additions & 7 deletions tests/test_load_jchat.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,11 +495,11 @@ def test_combined_format(self):
with self.store.session_scope():
# there must be states after the import
comments = self.store.session.query(self.store.db_classes.Comment).all()
assert len(comments) == 5
assert len(comments) == 7

# there must be platforms after the import
platforms = self.store.session.query(self.store.db_classes.Platform).all()
assert len(platforms) == 3
assert len(platforms) == 5

# there must be one datafile afterwards
datafiles = self.store.session.query(self.store.db_classes.Datafile).all()
Expand All @@ -510,12 +510,14 @@ def test_combined_format(self):
.order_by(self.store.db_classes.Comment.time)
.all()
)
assert len(results) == 5
assert len(results) == 7
assert results[0].content == "Modern - has i tag"
assert results[1].content == "Modern - no i tag but has multiple breaks"
assert results[2].content == "Modern - no i tag - no breaks"
assert results[3].content == "Legacy - font a swap - no i tag - no breaks"
assert results[4].content == "Legacy - font a swap - has i tag - no breaks"
assert results[5].content == "Modern 2 - no i tag but has multiple breaks"
assert results[6].content == "Modern 2 - no i tag - no breaks"

def test_invalid_missing_timestamp(self):
html_string = """<html>
Expand All @@ -537,7 +539,7 @@ def test_invalid_missing_timestamp(self):

check_errors_for_file_contents(
html_string,
"Unable to read message 34544=34534. No timestamp provided",
"Unable to read message 34544=34534. Not enough parts (expecting timestamp, platform, message)",
importer,
"no_timestamp.html",
)
Expand Down Expand Up @@ -588,7 +590,7 @@ def test_invalid_missing_platform(self):

check_errors_for_file_contents(
html_string,
"Unable to read message 34544=34534. No platform provided",
"Unable to read message 34544=34534. Not enough parts (expecting timestamp, platform, message)",
importer,
"no_platform.html",
)
Expand All @@ -613,7 +615,7 @@ def test_invalid_missing_message(self):

check_errors_for_file_contents(
html_string,
"Unable to read message 34544=34534. No message provided",
"Unable to read message 34544=34534. Not enough parts (expecting timestamp, platform, message)",
importer,
"no_message",
)
Expand All @@ -638,7 +640,10 @@ def test_empty_message(self):
importer = JChatImporter()

check_errors_for_file_contents(
html_string, "Unable to parse JChat message 34544=34534.", importer, "no_message"
html_string,
"Unable to read message 34544=34534. Not enough parts (expecting timestamp, platform, message)",
importer,
"no_message",
)

@staticmethod
Expand Down

0 comments on commit 60ad4cd

Please sign in to comment.