Skip to content

Commit

Permalink
Implement all encodings of base data type
Browse files Browse the repository at this point in the history
For integers, this means packed and unpacked BCD, two-complement,
one-complement and sign-magnitude, strings can be encoded using UTF-8,
UTF-16 big or little endian, ISO 8859-1 (latin1), ISO 8859-2 (latin2)
and using Windows codepage 1252. Finally floating point values can be
encoded using 32 and 64 bit ISO 754 representation.

Be aware that I'm not sure if the bit mask is handled correctly for
negative integers (or even if there is a "correct" handling of bit
masks for negative integers). Also, I suspect that most ODX
implementations encountered in the wild behave slightly differently
for non byte-aligned negative integers...

Signed-off-by: Andreas Lauser <andreas.lauser@mercedes-benz.com>
Signed-off-by: Christian Hackenbeck <christian.hackenbeck@mercedes-benz.com>
  • Loading branch information
andlaus committed Jan 17, 2025
1 parent d6ae041 commit 06f5e9a
Show file tree
Hide file tree
Showing 14 changed files with 617 additions and 113 deletions.
151 changes: 125 additions & 26 deletions odxtools/decodestate.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# SPDX-License-Identifier: MIT
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, cast
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple

import odxtools.exceptions as exceptions

from .exceptions import DecodeError
from .encoding import Encoding
from .exceptions import DecodeError, odxassert, odxraise, strict_mode
from .odxtypes import AtomicOdxType, DataType, ParameterValue

try:
Expand Down Expand Up @@ -54,8 +53,10 @@ class DecodeState:

def extract_atomic_value(
self,
*,
bit_length: int,
base_data_type: DataType,
base_type_encoding: Optional[Encoding],
is_highlow_byte_order: bool,
) -> AtomicOdxType:
"""Extract an internal value from a blob of raw bytes.
Expand All @@ -68,6 +69,11 @@ def extract_atomic_value(
if bit_length == 0:
return base_data_type.python_type()

if base_data_type == DataType.A_FLOAT32 and bit_length != 32:
odxraise("The bit length of FLOAT32 values must be 32 bits")
elif base_data_type == DataType.A_FLOAT64 and bit_length != 64:
odxraise("The bit length of FLOAT64 values must be 64 bits")

byte_length = (bit_length + self.cursor_bit_position + 7) // 8
if self.cursor_byte_position + byte_length > len(self.coded_message):
raise DecodeError(f"Expected a longer message.")
Expand All @@ -87,32 +93,125 @@ def extract_atomic_value(
extracted_bytes = extracted_bytes[::-1]

padding = (8 - (bit_length + self.cursor_bit_position) % 8) % 8
internal_value, = bitstruct.unpack_from(
raw_value, = bitstruct.unpack_from(
f"{base_data_type.bitstruct_format_letter}{bit_length}",
extracted_bytes,
offset=padding)

text_errors = 'strict' if exceptions.strict_mode else 'replace'
if base_data_type == DataType.A_ASCIISTRING:
assert isinstance(internal_value, (bytes, bytearray))
# The spec says ASCII, meaning only byte values 0-127.
# But in practice, vendors use iso-8859-1, aka latin-1
# reason being iso-8859-1 never fails since it has a valid
# character mapping for every possible byte sequence.
text_encoding = 'iso-8859-1'
internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
elif base_data_type == DataType.A_UTF8STRING:
assert isinstance(internal_value, (bytes, bytearray))
text_encoding = "utf-8"
internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
elif base_data_type == DataType.A_UNICODE2STRING:
assert isinstance(internal_value, (bytes, bytearray))
# For UTF-16, we need to manually decode the extracted
# bytes to a string
text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
internal_value: AtomicOdxType

# Deal with raw byte fields, ...
if base_data_type == DataType.A_BYTEFIELD:
odxassert(base_type_encoding is None or base_type_encoding == Encoding.NONE)

internal_value = raw_value

# ... string types, ...
elif base_data_type in (DataType.A_UTF8STRING, DataType.A_ASCIISTRING,
DataType.A_UNICODE2STRING):
text_errors = 'strict' if strict_mode else 'replace'
if base_type_encoding == Encoding.UTF8:
internal_value = raw_value.decode("utf-8", errors=text_errors)
elif base_type_encoding == Encoding.UCS2:
text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
internal_value = raw_value.decode(text_encoding, errors=text_errors)
elif base_type_encoding == Encoding.ISO_8859_1:
internal_value = raw_value.decode("iso-8859-1", errors=text_errors)
elif base_type_encoding == Encoding.ISO_8859_2:
internal_value = raw_value.decode("iso-8859-2", errors=text_errors)
elif base_type_encoding == Encoding.WINDOWS_1252:
internal_value = raw_value.decode("cp1252", errors=text_errors)
else:
odxassert(
base_type_encoding in (None, Encoding.NONE),
f"Specified illegal encoding {base_type_encoding} for string object")

# if no encoding has been specified explicitly, we
# make assumptions by looking at the data type
if base_data_type == DataType.A_UTF8STRING:
internal_value = raw_value.decode("utf-8", errors=text_errors)
elif base_data_type == DataType.A_UNICODE2STRING:
text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
internal_value = raw_value.decode(text_encoding, errors=text_errors)
else:
odxassert(base_data_type == DataType.A_ASCIISTRING)
# The spec says ASCII, meaning only character
# values 0-127. In practice, vendors use
# iso-8859-1, aka latin-1, because iso-8859-1
# never fails since it has a valid character
# mapping for every possible value
internal_value = raw_value.decode("iso-8859-1", errors=text_errors)

# ... integers, ...
elif base_data_type in (DataType.A_INT32, DataType.A_UINT32):
if not isinstance(raw_value, int):
odxraise(f"Raw value must be of integer type, not {type(raw_value).__name__}")

# BCD encodings
if base_type_encoding == Encoding.BCD_P:
# packed BCD
tmp2 = raw_value
internal_value = 0
factor = 1
while tmp2 > 0:
internal_value += (tmp2 & 0xf) * factor
factor *= 10
tmp2 >>= 4
elif base_type_encoding == Encoding.BCD_UP:
# unpacked BCD
tmp2 = raw_value
internal_value = 0
factor = 1
while tmp2 > 0:
internal_value += (tmp2 & 0xf) * factor
factor *= 10
tmp2 >>= 8
elif base_type_encoding == Encoding.ONEC:
# one-complement
sign_bit = 1 << (bit_length - 1)
if raw_value < sign_bit:
internal_value = raw_value
else:
# python defines the bitwise inversion of a
# positive integer value x as ~x = -(x + 1).
internal_value = -((1 << bit_length) - raw_value - 1)
elif base_type_encoding == Encoding.TWOC:
# two-complement
sign_bit = 1 << (bit_length - 1)
if raw_value < sign_bit:
internal_value = raw_value
else:
internal_value = -((1 << bit_length) - raw_value)
elif base_type_encoding == Encoding.SM:
# sign-magnitude
sign_bit = 1 << (bit_length - 1)
if raw_value < sign_bit:
internal_value = raw_value
else:
internal_value = -(raw_value - sign_bit)
else:
# None specified
odxassert(
base_type_encoding in (None, Encoding.NONE),
f"Unhandled integer encoding '{base_type_encoding}'")
internal_value = raw_value

# ... and others (floating point values)
else:
odxassert(base_data_type in (DataType.A_FLOAT32, DataType.A_FLOAT64))
odxassert(
base_type_encoding in (None, Encoding.NONE),
f"Specified illegal encoding '{base_type_encoding}' for float object")

if base_data_type == DataType.A_FLOAT32 and bit_length != 32:
odxraise(f"Illegal bit length for a float32 object ({bit_length})")
bit_length = 32
elif base_data_type == DataType.A_FLOAT64 and bit_length != 64:
odxraise(f"Illegal bit length for a float64 object ({bit_length})")
bit_length = 32

internal_value = float(raw_value)

self.cursor_byte_position += byte_length
self.cursor_bit_position = 0

return cast(AtomicOdxType, internal_value)
return internal_value
11 changes: 9 additions & 2 deletions odxtools/diagcodedtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from .decodestate import DecodeState
from .encodestate import EncodeState
from .encoding import Encoding
from .exceptions import odxassert, odxraise, odxrequire
from .odxlink import OdxDocFragment, OdxLinkDatabase, OdxLinkId
from .odxtypes import AtomicOdxType, DataType, odxstr_to_bool
Expand All @@ -23,7 +24,7 @@
class DiagCodedType:

base_data_type: DataType
base_type_encoding: Optional[str]
base_type_encoding: Optional[Encoding]
is_highlow_byte_order_raw: Optional[bool]

@staticmethod
Expand All @@ -36,7 +37,13 @@ def from_et(et_element: ElementTree.Element,
odxraise(f"Unknown base data type {base_data_type_str}")
base_data_type = cast(DataType, None)

base_type_encoding = et_element.get("BASE-TYPE-ENCODING")
base_type_encoding = None
if (base_type_encoding_str := et_element.get("BASE-TYPE-ENCODING")) is not None:
try:
base_type_encoding = Encoding(base_type_encoding_str)
except ValueError:
odxraise(f"Encountered unknown BASE-TYPE-ENCODING '{base_type_encoding_str}'")

is_highlow_byte_order_raw = odxstr_to_bool(et_element.get("IS-HIGHLOW-BYTE-ORDER"))

return DiagCodedType(
Expand Down
Loading

0 comments on commit 06f5e9a

Please sign in to comment.