mercedes-benz · andlaus · Jan 21, 2025 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025
diff --git a/examples/somersault.pdx b/examples/somersault.pdx
diff --git a/examples/somersault_modified.pdx b/examples/somersault_modified.pdx
diff --git a/examples/somersaultecu.py b/examples/somersaultecu.py
@@ -1733,11 +1733,10 @@ class SomersaultSID(IntEnum):
                 ValueParameter(
                     oid=None,
                     short_name="schroedinger_param",
-                    long_name=None,
+                    long_name="Parameter where the DOP changes dending on how you "
+                    "look at the SNREF to it",
                     semantic=None,
-                    description=Description.from_string(
-                        "Parameter where the DOP changes dending on how you "
-                        "look at the SNREF to it"),
+                    description=None,
                     physical_default_value_raw=None,
                     byte_position=0,
                     dop_ref=None,

diff --git a/odxtools/decodestate.py b/odxtools/decodestate.py
@@ -1,10 +1,9 @@
 # SPDX-License-Identifier: MIT
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 
-import odxtools.exceptions as exceptions
-
-from .exceptions import DecodeError
+from .encoding import Encoding
+from .exceptions import DecodeError, odxassert, odxraise, strict_mode
 from .odxtypes import AtomicOdxType, DataType, ParameterValue
 
 try:
@@ -54,8 +53,10 @@ class DecodeState:
 
     def extract_atomic_value(
         self,
+        *,
         bit_length: int,
         base_data_type: DataType,
+        base_type_encoding: Optional[Encoding],
         is_highlow_byte_order: bool,
     ) -> AtomicOdxType:
         """Extract an internal value from a blob of raw bytes.
@@ -68,6 +69,13 @@ def extract_atomic_value(
         if bit_length == 0:
             return base_data_type.python_type()
 
+        if base_data_type == DataType.A_FLOAT32 and bit_length != 32:
+            odxraise("The bit length of FLOAT32 values must be 32 bits")
+            bit_length = 32
+        elif base_data_type == DataType.A_FLOAT64 and bit_length != 64:
+            odxraise("The bit length of FLOAT64 values must be 64 bits")
+            bit_length = 64
+
         byte_length = (bit_length + self.cursor_bit_position + 7) // 8
         if self.cursor_byte_position + byte_length > len(self.coded_message):
             raise DecodeError(f"Expected a longer message.")
@@ -87,32 +95,125 @@ def extract_atomic_value(
             extracted_bytes = extracted_bytes[::-1]
 
         padding = (8 - (bit_length + self.cursor_bit_position) % 8) % 8
-        internal_value, = bitstruct.unpack_from(
+        raw_value, = bitstruct.unpack_from(
             f"{base_data_type.bitstruct_format_letter}{bit_length}",
             extracted_bytes,
             offset=padding)
-
-        text_errors = 'strict' if exceptions.strict_mode else 'replace'
-        if base_data_type == DataType.A_ASCIISTRING:
-            assert isinstance(internal_value, (bytes, bytearray))
-            # The spec says ASCII, meaning only byte values 0-127.
-            # But in practice, vendors use iso-8859-1, aka latin-1
-            # reason being iso-8859-1 never fails since it has a valid
-            # character mapping for every possible byte sequence.
-            text_encoding = 'iso-8859-1'
-            internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
-        elif base_data_type == DataType.A_UTF8STRING:
-            assert isinstance(internal_value, (bytes, bytearray))
-            text_encoding = "utf-8"
-            internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
-        elif base_data_type == DataType.A_UNICODE2STRING:
-            assert isinstance(internal_value, (bytes, bytearray))
-            # For UTF-16, we need to manually decode the extracted
-            # bytes to a string
-            text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
-            internal_value = internal_value.decode(encoding=text_encoding, errors=text_errors)
+        internal_value: AtomicOdxType
+
+        # Deal with raw byte fields, ...
+        if base_data_type == DataType.A_BYTEFIELD:
+            odxassert(
+                base_type_encoding is None or
+                base_type_encoding in (Encoding.NONE, Encoding.BCD_P, Encoding.BCD_UP))
+
+            # note that we do not ensure that BCD-encoded byte fields
+            # only represent "legal" values
+            internal_value = raw_value
+
+        # ... string types, ...
+        elif base_data_type in (DataType.A_UTF8STRING, DataType.A_ASCIISTRING,
+                                DataType.A_UNICODE2STRING):
+            # note that the spec disallows certain combinations of
+            # base_data_type and encoding (e.g., A_ASCIISTRING encoded
+            # using UTF-8). Since in python3 strings are always
+            # capable of the full unicode character set, odxtools
+            # ignores these restrictions...
+            text_errors = 'strict' if strict_mode else 'replace'
+            if base_type_encoding == Encoding.UTF8 or (base_data_type == DataType.A_UTF8STRING and
+                                                       base_type_encoding is None):
+                internal_value = raw_value.decode("utf-8", errors=text_errors)
+            elif base_type_encoding == Encoding.UCS2 or (base_data_type == DataType.A_UNICODE2STRING
+                                                         and base_type_encoding is None):
+                text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
+                internal_value = raw_value.decode(text_encoding, errors=text_errors)
+            elif base_type_encoding == Encoding.ISO_8859_1 or (
+                    base_data_type == DataType.A_ASCIISTRING and base_type_encoding is None):
+                internal_value = raw_value.decode("iso-8859-1", errors=text_errors)
+            elif base_type_encoding == Encoding.ISO_8859_2:
+                internal_value = raw_value.decode("iso-8859-2", errors=text_errors)
+            elif base_type_encoding == Encoding.WINDOWS_1252:
+                internal_value = raw_value.decode("cp1252", errors=text_errors)
+            else:
+                odxraise(f"Specified illegal encoding {base_type_encoding} for string object")
+                internal_value = raw_value.decode("iso-8859-1", errors=text_errors)
+
+        # ... signed integers, ...
+        elif base_data_type == DataType.A_INT32:
+            if not isinstance(raw_value, int):
+                odxraise(f"Raw value must be of integer type, not {type(raw_value).__name__}")
+
+            if base_type_encoding == Encoding.ONEC:
+                # one-complement
+                sign_bit = 1 << (bit_length - 1)
+                if raw_value < sign_bit:
+                    internal_value = raw_value
+                else:
+                    # python defines the bitwise inversion of a
+                    # positive integer value x as ~x = -(x + 1).
+                    internal_value = -((1 << bit_length) - raw_value - 1)
+            elif base_type_encoding == Encoding.TWOC or base_type_encoding is None:
+                # two-complement
+                sign_bit = 1 << (bit_length - 1)
+                if raw_value < sign_bit:
+                    internal_value = raw_value
+                else:
+                    internal_value = -((1 << bit_length) - raw_value)
+            elif base_type_encoding == Encoding.SM:
+                # sign-magnitude
+                sign_bit = 1 << (bit_length - 1)
+                if raw_value < sign_bit:
+                    internal_value = raw_value
+                else:
+                    internal_value = -(raw_value - sign_bit)
+            else:
+                odxraise(f"Illegal encoding ({base_type_encoding}) specified for "
+                         f"{base_data_type.value}")
+
+                internal_value = raw_value
+
+        # ... unsigned integers, ...
+        elif base_data_type == DataType.A_UINT32:
+            if not isinstance(raw_value, int) or raw_value < 0:
+                odxraise(f"Raw value must be a positive integer, not {raw_value}")
+
+            if base_type_encoding == Encoding.BCD_P:
+                # packed BCD
+                tmp2 = raw_value
+                internal_value = 0
+                factor = 1
+                while tmp2 > 0:
+                    internal_value += (tmp2 & 0xf) * factor
+                    factor *= 10
+                    tmp2 >>= 4
+            elif base_type_encoding == Encoding.BCD_UP:
+                # unpacked BCD
+                tmp2 = raw_value
+                internal_value = 0
+                factor = 1
+                while tmp2 > 0:
+                    internal_value += (tmp2 & 0xf) * factor
+                    factor *= 10
+                    tmp2 >>= 8
+            elif base_type_encoding in (None, Encoding.NONE):
+                # no encoding
+                internal_value = raw_value
+            else:
+                odxraise(f"Illegal encoding ({base_type_encoding}) specified for "
+                         f"{base_data_type.value}")
+
+                internal_value = raw_value
+
+        # ... and others (floating point values)
+        else:
+            odxassert(base_data_type in (DataType.A_FLOAT32, DataType.A_FLOAT64))
+            odxassert(
+                base_type_encoding in (None, Encoding.NONE),
+                f"Specified illegal encoding '{base_type_encoding}' for float object")
+
+            internal_value = float(raw_value)
 
         self.cursor_byte_position += byte_length
         self.cursor_bit_position = 0
 
-        return cast(AtomicOdxType, internal_value)
+        return internal_value
diff --git a/odxtools/diagcodedtype.py b/odxtools/diagcodedtype.py
@@ -5,6 +5,7 @@
 
 from .decodestate import DecodeState
 from .encodestate import EncodeState
+from .encoding import Encoding
 from .exceptions import odxassert, odxraise, odxrequire
 from .odxlink import OdxDocFragment, OdxLinkDatabase, OdxLinkId
 from .odxtypes import AtomicOdxType, DataType, odxstr_to_bool
@@ -23,7 +24,7 @@
 class DiagCodedType:
 
     base_data_type: DataType
-    base_type_encoding: Optional[str]
+    base_type_encoding: Optional[Encoding]
     is_highlow_byte_order_raw: Optional[bool]
 
     @staticmethod
@@ -36,7 +37,13 @@ def from_et(et_element: ElementTree.Element,
             odxraise(f"Unknown base data type {base_data_type_str}")
             base_data_type = cast(DataType, None)
 
-        base_type_encoding = et_element.get("BASE-TYPE-ENCODING")
+        base_type_encoding = None
+        if (base_type_encoding_str := et_element.get("BASE-TYPE-ENCODING")) is not None:
+            try:
+                base_type_encoding = Encoding(base_type_encoding_str)
+            except ValueError:
+                odxraise(f"Encountered unknown BASE-TYPE-ENCODING '{base_type_encoding_str}'")
+
         is_highlow_byte_order_raw = odxstr_to_bool(et_element.get("IS-HIGHLOW-BYTE-ORDER"))
 
         return DiagCodedType(