Implement all encodings of base data type

For integers, this means packed and unpacked BCD, two-complement, one-complement and sign-magnitude, strings can be encoded using UTF-8, UTF-16 big or little endian, ISO 8859-1 (latin1), ISO 8859-2 (latin2) and using Windows codepage 1252. Finally floating point values can be encoded using 32 and 64 bit ISO 754 representation. Be aware that I'm not sure if the bit mask is handled correctly for negative integers (or even if there is a "correct" handling of bit masks for negative integers). Also, I suspect that most ODX implementations encountered in the wild behave slightly differently for non byte-aligned negative integers... Signed-off-by: Andreas Lauser <andreas.lauser@mercedes-benz.com> Signed-off-by: Christian Hackenbeck <christian.hackenbeck@mercedes-benz.com>
mercedes-benz · Jan 17, 2025 · 06f5e9a · 06f5e9a
1 parent d6ae041
commit 06f5e9a
Show file tree

Hide file tree

Showing 14 changed files with 617 additions and 113 deletions.
diff --git a/odxtools/decodestate.py b/odxtools/decodestate.py
@@ -1,10 +1,9 @@
 # SPDX-License-Identifier: MIT
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 
-import odxtools.exceptions as exceptions
-
-from .exceptions import DecodeError
+from .encoding import Encoding
+from .exceptions import DecodeError, odxassert, odxraise, strict_mode
 from .odxtypes import AtomicOdxType, DataType, ParameterValue
 
 try:
@@ -54,8 +53,10 @@ class DecodeState:
 
     def extract_atomic_value(
         self,
+        *,
         bit_length: int,
         base_data_type: DataType,
+        base_type_encoding: Optional[Encoding],
         is_highlow_byte_order: bool,
     ) -> AtomicOdxType:
         """Extract an internal value from a blob of raw bytes.
@@ -68,6 +69,11 @@ def extract_atomic_value(
         if bit_length == 0:
             return base_data_type.python_type()
 
+        if base_data_type == DataType.A_FLOAT32 and bit_length != 32:
+            odxraise("The bit length of FLOAT32 values must be 32 bits")
+        elif base_data_type == DataType.A_FLOAT64 and bit_length != 64:
+            odxraise("The bit length of FLOAT64 values must be 64 bits")
+
         byte_length = (bit_length + self.cursor_bit_position + 7) // 8
         if self.cursor_byte_position + byte_length > len(self.coded_message):
             raise DecodeError(f"Expected a longer message.")
@@ -87,32 +93,125 @@ def extract_atomic_value(
             extracted_bytes = extracted_bytes[::-1]
 
         padding = (8 - (bit_length + self.cursor_bit_position) % 8) % 8
-        internal_value, = bitstruct.unpack_from(
+        raw_value, = bitstruct.unpack_from(
             f"{base_data_type.bitstruct_format_letter}{bit_length}",
             extracted_bytes,
             offset=padding)
-
-        text_errors = 'strict' if exceptions.strict_mode else 'replace'
-        if base_data_type == DataType.A_ASCIISTRING:
-            assert isinstance(internal_value, (bytes, bytearray))
-            # The spec says ASCII, meaning only byte values 0-127.
-            # But in practice, vendors use iso-8859-1, aka latin-1
-            # reason being iso-8859-1 never fails since it has a valid
-            # character mapping for every possible byte sequence.
-            text_encoding = 'iso-8859-1'
-            internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
-        elif base_data_type == DataType.A_UTF8STRING:
-            assert isinstance(internal_value, (bytes, bytearray))
-            text_encoding = "utf-8"
-            internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
-        elif base_data_type == DataType.A_UNICODE2STRING:
-            assert isinstance(internal_value, (bytes, bytearray))
-            # For UTF-16, we need to manually decode the extracted
-            # bytes to a string
-            text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
-            internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
+        internal_value: AtomicOdxType
+
+        # Deal with raw byte fields, ...
+        if base_data_type == DataType.A_BYTEFIELD:
+            odxassert(base_type_encoding is None or base_type_encoding == Encoding.NONE)
+
+            internal_value = raw_value
+
+        # ... string types, ...
+        elif base_data_type in (DataType.A_UTF8STRING, DataType.A_ASCIISTRING,
+                                DataType.A_UNICODE2STRING):
+            text_errors = 'strict' if strict_mode else 'replace'
+            if base_type_encoding == Encoding.UTF8:
+                internal_value = raw_value.decode("utf-8", errors=text_errors)
+            elif base_type_encoding == Encoding.UCS2:
+                text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
+                internal_value = raw_value.decode(text_encoding, errors=text_errors)
+            elif base_type_encoding == Encoding.ISO_8859_1:
+                internal_value = raw_value.decode("iso-8859-1", errors=text_errors)
+            elif base_type_encoding == Encoding.ISO_8859_2:
+                internal_value = raw_value.decode("iso-8859-2", errors=text_errors)
+            elif base_type_encoding == Encoding.WINDOWS_1252:
+                internal_value = raw_value.decode("cp1252", errors=text_errors)
+            else:
+                odxassert(
+                    base_type_encoding in (None, Encoding.NONE),
+                    f"Specified illegal encoding {base_type_encoding} for string object")
+
+                # if no encoding has been specified explicitly, we
+                # make assumptions by looking at the data type
+                if base_data_type == DataType.A_UTF8STRING:
+                    internal_value = raw_value.decode("utf-8", errors=text_errors)
+                elif base_data_type == DataType.A_UNICODE2STRING:
+                    text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
+                    internal_value = raw_value.decode(text_encoding, errors=text_errors)
+                else:
+                    odxassert(base_data_type == DataType.A_ASCIISTRING)
+                    # The spec says ASCII, meaning only character
+                    # values 0-127.  In practice, vendors use
+                    # iso-8859-1, aka latin-1, because iso-8859-1
+                    # never fails since it has a valid character
+                    # mapping for every possible value
+                    internal_value = raw_value.decode("iso-8859-1", errors=text_errors)
+
+        # ... integers, ...
+        elif base_data_type in (DataType.A_INT32, DataType.A_UINT32):
+            if not isinstance(raw_value, int):
+                odxraise(f"Raw value must be of integer type, not {type(raw_value).__name__}")
+
+            # BCD encodings
+            if base_type_encoding == Encoding.BCD_P:
+                # packed BCD
+                tmp2 = raw_value
+                internal_value = 0
+                factor = 1
+                while tmp2 > 0:
+                    internal_value += (tmp2 & 0xf) * factor
+                    factor *= 10
+                    tmp2 >>= 4
+            elif base_type_encoding == Encoding.BCD_UP:
+                # unpacked BCD
+                tmp2 = raw_value
+                internal_value = 0
+                factor = 1
+                while tmp2 > 0:
+                    internal_value += (tmp2 & 0xf) * factor
+                    factor *= 10
+                    tmp2 >>= 8
+            elif base_type_encoding == Encoding.ONEC:
+                # one-complement
+                sign_bit = 1 << (bit_length - 1)
+                if raw_value < sign_bit:
+                    internal_value = raw_value
+                else:
+                    # python defines the bitwise inversion of a
+                    # positive integer value x as ~x = -(x + 1).
+                    internal_value = -((1 << bit_length) - raw_value - 1)
+            elif base_type_encoding == Encoding.TWOC:
+                # two-complement
+                sign_bit = 1 << (bit_length - 1)
+                if raw_value < sign_bit:
+                    internal_value = raw_value
+                else:
+                    internal_value = -((1 << bit_length) - raw_value)
+            elif base_type_encoding == Encoding.SM:
+                # sign-magnitude
+                sign_bit = 1 << (bit_length - 1)
+                if raw_value < sign_bit:
+                    internal_value = raw_value
+                else:
+                    internal_value = -(raw_value - sign_bit)
+            else:
+                # None specified
+                odxassert(
+                    base_type_encoding in (None, Encoding.NONE),
+                    f"Unhandled integer encoding '{base_type_encoding}'")
+                internal_value = raw_value
+
+        # ... and others (floating point values)
+        else:
+            odxassert(base_data_type in (DataType.A_FLOAT32, DataType.A_FLOAT64))
+            odxassert(
+                base_type_encoding in (None, Encoding.NONE),
+                f"Specified illegal encoding '{base_type_encoding}' for float object")
+
+            if base_data_type == DataType.A_FLOAT32 and bit_length != 32:
+                odxraise(f"Illegal bit length for a float32 object ({bit_length})")
+                bit_length = 32
+            elif base_data_type == DataType.A_FLOAT64 and bit_length != 64:
+                odxraise(f"Illegal bit length for a float64 object ({bit_length})")
+                bit_length = 32
+
+            internal_value = float(raw_value)
 
         self.cursor_byte_position += byte_length
         self.cursor_bit_position = 0
 
-        return cast(AtomicOdxType, internal_value)
+        return internal_value
diff --git a/odxtools/diagcodedtype.py b/odxtools/diagcodedtype.py
@@ -5,6 +5,7 @@
 
 from .decodestate import DecodeState
 from .encodestate import EncodeState
+from .encoding import Encoding
 from .exceptions import odxassert, odxraise, odxrequire
 from .odxlink import OdxDocFragment, OdxLinkDatabase, OdxLinkId
 from .odxtypes import AtomicOdxType, DataType, odxstr_to_bool
@@ -23,7 +24,7 @@
 class DiagCodedType:
 
     base_data_type: DataType
-    base_type_encoding: Optional[str]
+    base_type_encoding: Optional[Encoding]
     is_highlow_byte_order_raw: Optional[bool]
 
     @staticmethod
@@ -36,7 +37,13 @@ def from_et(et_element: ElementTree.Element,
             odxraise(f"Unknown base data type {base_data_type_str}")
             base_data_type = cast(DataType, None)
 
-        base_type_encoding = et_element.get("BASE-TYPE-ENCODING")
+        base_type_encoding = None
+        if (base_type_encoding_str := et_element.get("BASE-TYPE-ENCODING")) is not None:
+            try:
+                base_type_encoding = Encoding(base_type_encoding_str)
+            except ValueError:
+                odxraise(f"Encountered unknown BASE-TYPE-ENCODING '{base_type_encoding_str}'")
+
         is_highlow_byte_order_raw = odxstr_to_bool(et_element.get("IS-HIGHLOW-BYTE-ORDER"))
 
         return DiagCodedType(