Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement all encodings of base data type #381

Merged
merged 9 commits into from
Jan 21, 2025
Binary file modified examples/somersault.pdx
Binary file not shown.
Binary file modified examples/somersault_modified.pdx
Binary file not shown.
7 changes: 3 additions & 4 deletions examples/somersaultecu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1733,11 +1733,10 @@ class SomersaultSID(IntEnum):
ValueParameter(
oid=None,
short_name="schroedinger_param",
long_name=None,
long_name="Parameter where the DOP changes dending on how you "
"look at the SNREF to it",
semantic=None,
description=Description.from_string(
"Parameter where the DOP changes dending on how you "
"look at the SNREF to it"),
description=None,
physical_default_value_raw=None,
byte_position=0,
dop_ref=None,
Expand Down
153 changes: 127 additions & 26 deletions odxtools/decodestate.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# SPDX-License-Identifier: MIT
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, cast
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple

import odxtools.exceptions as exceptions

from .exceptions import DecodeError
from .encoding import Encoding
from .exceptions import DecodeError, odxassert, odxraise, strict_mode
from .odxtypes import AtomicOdxType, DataType, ParameterValue

try:
Expand Down Expand Up @@ -54,8 +53,10 @@ class DecodeState:

def extract_atomic_value(
self,
*,
bit_length: int,
base_data_type: DataType,
base_type_encoding: Optional[Encoding],
is_highlow_byte_order: bool,
) -> AtomicOdxType:
"""Extract an internal value from a blob of raw bytes.
Expand All @@ -68,6 +69,13 @@ def extract_atomic_value(
if bit_length == 0:
return base_data_type.python_type()

if base_data_type == DataType.A_FLOAT32 and bit_length != 32:
odxraise("The bit length of FLOAT32 values must be 32 bits")
bit_length = 32
elif base_data_type == DataType.A_FLOAT64 and bit_length != 64:
odxraise("The bit length of FLOAT64 values must be 64 bits")
bit_length = 64

byte_length = (bit_length + self.cursor_bit_position + 7) // 8
if self.cursor_byte_position + byte_length > len(self.coded_message):
raise DecodeError(f"Expected a longer message.")
Expand All @@ -87,32 +95,125 @@ def extract_atomic_value(
extracted_bytes = extracted_bytes[::-1]

padding = (8 - (bit_length + self.cursor_bit_position) % 8) % 8
internal_value, = bitstruct.unpack_from(
raw_value, = bitstruct.unpack_from(
f"{base_data_type.bitstruct_format_letter}{bit_length}",
extracted_bytes,
offset=padding)

text_errors = 'strict' if exceptions.strict_mode else 'replace'
if base_data_type == DataType.A_ASCIISTRING:
assert isinstance(internal_value, (bytes, bytearray))
# The spec says ASCII, meaning only byte values 0-127.
# But in practice, vendors use iso-8859-1, aka latin-1
# reason being iso-8859-1 never fails since it has a valid
# character mapping for every possible byte sequence.
text_encoding = 'iso-8859-1'
internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
elif base_data_type == DataType.A_UTF8STRING:
assert isinstance(internal_value, (bytes, bytearray))
text_encoding = "utf-8"
internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
elif base_data_type == DataType.A_UNICODE2STRING:
assert isinstance(internal_value, (bytes, bytearray))
# For UTF-16, we need to manually decode the extracted
# bytes to a string
text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
internal_value: AtomicOdxType

# Deal with raw byte fields, ...
if base_data_type == DataType.A_BYTEFIELD:
odxassert(
base_type_encoding is None or
base_type_encoding in (Encoding.NONE, Encoding.BCD_P, Encoding.BCD_UP))
andlaus marked this conversation as resolved.
Show resolved Hide resolved

# note that we do not ensure that BCD-encoded byte fields
# only represent "legal" values
internal_value = raw_value

# ... string types, ...
elif base_data_type in (DataType.A_UTF8STRING, DataType.A_ASCIISTRING,
DataType.A_UNICODE2STRING):
# note that the spec disallows certain combinations of
# base_data_type and encoding (e.g., A_ASCIISTRING encoded
# using UTF-8). Since in python3 strings are always
# capable of the full unicode character set, odxtools
# ignores these restrictions...
text_errors = 'strict' if strict_mode else 'replace'
if base_type_encoding == Encoding.UTF8 or (base_data_type == DataType.A_UTF8STRING and
base_type_encoding is None):
internal_value = raw_value.decode("utf-8", errors=text_errors)
elif base_type_encoding == Encoding.UCS2 or (base_data_type == DataType.A_UNICODE2STRING
and base_type_encoding is None):
text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
internal_value = raw_value.decode(text_encoding, errors=text_errors)
elif base_type_encoding == Encoding.ISO_8859_1 or (
base_data_type == DataType.A_ASCIISTRING and base_type_encoding is None):
internal_value = raw_value.decode("iso-8859-1", errors=text_errors)
elif base_type_encoding == Encoding.ISO_8859_2:
internal_value = raw_value.decode("iso-8859-2", errors=text_errors)
elif base_type_encoding == Encoding.WINDOWS_1252:
internal_value = raw_value.decode("cp1252", errors=text_errors)
else:
odxraise(f"Specified illegal encoding {base_type_encoding} for string object")
internal_value = raw_value.decode("iso-8859-1", errors=text_errors)

# ... signed integers, ...
elif base_data_type == DataType.A_INT32:
if not isinstance(raw_value, int):
odxraise(f"Raw value must be of integer type, not {type(raw_value).__name__}")
andlaus marked this conversation as resolved.
Show resolved Hide resolved

if base_type_encoding == Encoding.ONEC:
# one-complement
sign_bit = 1 << (bit_length - 1)
if raw_value < sign_bit:
internal_value = raw_value
else:
# python defines the bitwise inversion of a
# positive integer value x as ~x = -(x + 1).
internal_value = -((1 << bit_length) - raw_value - 1)
elif base_type_encoding == Encoding.TWOC or base_type_encoding is None:
andlaus marked this conversation as resolved.
Show resolved Hide resolved
# two-complement
sign_bit = 1 << (bit_length - 1)
if raw_value < sign_bit:
internal_value = raw_value
else:
internal_value = -((1 << bit_length) - raw_value)
kayoub5 marked this conversation as resolved.
Show resolved Hide resolved
elif base_type_encoding == Encoding.SM:
# sign-magnitude
sign_bit = 1 << (bit_length - 1)
if raw_value < sign_bit:
internal_value = raw_value
else:
internal_value = -(raw_value - sign_bit)
else:
odxraise(f"Illegal encoding ({base_type_encoding}) specified for "
f"{base_data_type.value}")

internal_value = raw_value

# ... unsigned integers, ...
elif base_data_type == DataType.A_UINT32:
if not isinstance(raw_value, int) or raw_value < 0:
odxraise(f"Raw value must be a positive integer, not {raw_value}")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would this happen?

Copy link
Member Author

@andlaus andlaus Jan 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently it should not be possible because we always extract unsigned integers using bitstruct. This is mainly to prevent things from breaking if this gets changed in the future...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is mainly to prevent things from breaking if this gets changed in the future

That's what unit tests are for, I would not mind a few checks, but the function now have more checks that encoding/decoding logic

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay, I'm rather indifferent about this. changed: 5c698b5


if base_type_encoding == Encoding.BCD_P:
# packed BCD
tmp2 = raw_value
internal_value = 0
factor = 1
while tmp2 > 0:
internal_value += (tmp2 & 0xf) * factor
factor *= 10
tmp2 >>= 4
elif base_type_encoding == Encoding.BCD_UP:
# unpacked BCD
tmp2 = raw_value
internal_value = 0
factor = 1
while tmp2 > 0:
internal_value += (tmp2 & 0xf) * factor
factor *= 10
tmp2 >>= 8
elif base_type_encoding in (None, Encoding.NONE):
# no encoding
internal_value = raw_value
else:
odxraise(f"Illegal encoding ({base_type_encoding}) specified for "
f"{base_data_type.value}")

internal_value = raw_value

# ... and others (floating point values)
else:
odxassert(base_data_type in (DataType.A_FLOAT32, DataType.A_FLOAT64))
odxassert(
base_type_encoding in (None, Encoding.NONE),
f"Specified illegal encoding '{base_type_encoding}' for float object")

internal_value = float(raw_value)

self.cursor_byte_position += byte_length
self.cursor_bit_position = 0

return cast(AtomicOdxType, internal_value)
return internal_value
11 changes: 9 additions & 2 deletions odxtools/diagcodedtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from .decodestate import DecodeState
from .encodestate import EncodeState
from .encoding import Encoding
from .exceptions import odxassert, odxraise, odxrequire
from .odxlink import OdxDocFragment, OdxLinkDatabase, OdxLinkId
from .odxtypes import AtomicOdxType, DataType, odxstr_to_bool
Expand All @@ -23,7 +24,7 @@
class DiagCodedType:

base_data_type: DataType
base_type_encoding: Optional[str]
base_type_encoding: Optional[Encoding]
is_highlow_byte_order_raw: Optional[bool]

@staticmethod
Expand All @@ -36,7 +37,13 @@ def from_et(et_element: ElementTree.Element,
odxraise(f"Unknown base data type {base_data_type_str}")
base_data_type = cast(DataType, None)

base_type_encoding = et_element.get("BASE-TYPE-ENCODING")
base_type_encoding = None
if (base_type_encoding_str := et_element.get("BASE-TYPE-ENCODING")) is not None:
try:
base_type_encoding = Encoding(base_type_encoding_str)
except ValueError:
odxraise(f"Encountered unknown BASE-TYPE-ENCODING '{base_type_encoding_str}'")

is_highlow_byte_order_raw = odxstr_to_bool(et_element.get("IS-HIGHLOW-BYTE-ORDER"))

return DiagCodedType(
Expand Down
Loading
Loading