From 2650598e839a853c70c5ec19253e2e86e40f63bc Mon Sep 17 00:00:00 2001
From: Peder Hovdan Andresen <107681714+pederhan@users.noreply.github.com>
Date: Thu, 10 Aug 2023 13:16:12 +0200
Subject: [PATCH] Migrate to Pydantic v2 (#64)

* Migrate to pydantic v2

* Fix pydantic version in setup.cfg

* Remove use of `pydantic.Extra`

Uses string literals instead

* Fix deprecated ValidationError import path

* Do not test msg and ctx of pydantic messages

https://docs.pydantic.dev/latest/version-compatibility/#pydantic-v2-changes

* Sort imports

* Use model_dump_json()

* Revalidate hosts after modification
---
 setup.cfg                                     |   2 +-
 tests/test_config.py                          |  11 +-
 tests/test_models.py                          |  61 ++++-----
 .../test_sourcecollectorprocess.py            |   4 +-
 zabbix_auto_config/__init__.py                |  12 +-
 zabbix_auto_config/models.py                  | 123 ++++++++----------
 zabbix_auto_config/processing.py              |  21 +--
 7 files changed, 113 insertions(+), 121 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 9e39bf7..777a927 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -24,7 +24,7 @@ python_requires = >=3.8
 install_requires =
     multiprocessing-logging == 0.3.1
     psycopg2
-    pydantic >=1.0.0,<2.0.0
+    pydantic >= 2.0.0
     pyzabbix
     requests
     tomli
diff --git a/tests/test_config.py b/tests/test_config.py
index 90a16c2..4297ac8 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -2,7 +2,7 @@
 import tomli
 
 import pytest
-from pydantic import Extra, ValidationError
+from pydantic import ValidationError
 import zabbix_auto_config.models as models
 
 
@@ -28,13 +28,13 @@ def test_config_extra_field_allowed(
     config["foo"] = "bar"
 
     # Allow extra fields for this test
-    original_extra = models.Settings.__config__.extra
+    original_extra = models.Settings.model_config["extra"]
     try:
-        models.Settings.__config__.extra = Extra.allow
+        models.Settings.model_config["extra"] = "allow"
         models.Settings(**config)
         assert len(caplog.records) == 0
     finally:
-        models.Settings.__config__.extra = original_extra
+        models.Settings.model_config["extra"] = original_extra
 
 
 def test_sourcecollectorsettings_defaults():
@@ -108,7 +108,6 @@ def test_sourcecollectorsettings_duration_too_short():
     errors = exc_info.value.errors()
     assert len(errors) == 1
     error = errors[0]
-    assert error["loc"] == ("error_duration",)
     assert "greater than 300" in error["msg"]
     assert error["type"] == "value_error"
 
@@ -126,4 +125,4 @@ def test_sourcecollectorsettings_duration_negative():
     assert len(errors) == 1
     error = errors[0]
     assert error["loc"] == ("error_duration",)
-    assert error["type"] == "value_error.number.not_ge"
+    assert error["type"] == "greater_than_equal"
diff --git a/tests/test_models.py b/tests/test_models.py
index 721b820..213734b 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,8 +1,13 @@
 import pytest
-from pydantic.error_wrappers import ValidationError
+from pydantic import ValidationError
 from zabbix_auto_config import models
 
 
+# NOTE: Do not test msg and ctx of Pydantic errors!
+# They are not guaranteed to be stable between minor versions.
+# https://docs.pydantic.dev/latest/version-compatibility/#pydantic-v2-changes
+
+
 def find_host_by_hostname(hosts, hostname):
     for host in hosts:
         if host["hostname"].startswith(hostname):
@@ -24,53 +29,49 @@ def test_invalid_proxy_pattern(invalid_hosts):
     host = find_host_by_hostname(invalid_hosts, "invalid-proxy-pattern")
     with pytest.raises(ValidationError) as exc_info:
         models.Host(**host)
-    assert exc_info.value.errors() == [
-        {
-            "loc": ("proxy_pattern",),
-            "msg": "Must be valid regexp pattern: '['",
-            "type": "assertion_error",
-        }
-    ]
+    errors = exc_info.value.errors()
+    assert len(errors) == 1
+    error = errors[0]
+    assert error["loc"] == ("proxy_pattern",)
+    assert "Must be valid regexp pattern: '['" in error["msg"]
+    assert error["type"] == "assertion_error"
 
 
 def test_invalid_interface(invalid_hosts):
     host = find_host_by_hostname(invalid_hosts, "invalid-interface")
     with pytest.raises(ValidationError) as exc_info:
         models.Host(**host)
-    assert exc_info.value.errors() == [
-        {
-            "loc": ("interfaces", 0, "type"),
-            "msg": "Interface of type 2 must have details set",
-            "type": "value_error",
-        }
-    ]
+    errors = exc_info.value.errors()
+    assert len(errors) == 1
+    error = errors[0]
+    assert error["loc"] == ("interfaces", 0)
+    assert "Interface of type 2 must have details set" in error["msg"]
+    assert error["type"] == "value_error"
 
 
 def test_duplicate_interface(invalid_hosts):
     host = find_host_by_hostname(invalid_hosts, "duplicate-interface")
     with pytest.raises(ValidationError) as exc_info:
         models.Host(**host)
-    assert exc_info.value.errors() == [
-        {
-            "loc": ("interfaces",),
-            "msg": "No duplicate interface types: [1, 1]",
-            "type": "assertion_error",
-        }
-    ]
+    errors = exc_info.value.errors()
+    assert len(errors) == 1
+    error = errors[0]
+    assert error["loc"] == ("interfaces",)
+    assert "No duplicate interface types: [1, 1]" in error["msg"]
+    assert error["type"] == "assertion_error"
+
 
 
 def test_invalid_importance(invalid_hosts):
     host = find_host_by_hostname(invalid_hosts, "invalid-importance")
     with pytest.raises(ValidationError) as exc_info:
         models.Host(**host)
-    assert exc_info.value.errors() == [
-        {
-            "loc": ("importance",),
-            "msg": "ensure this value is greater than or equal to 0",
-            "type": "value_error.number.not_ge",
-            "ctx": {"limit_value": 0},
-        }
-    ]
+    errors = exc_info.value.errors()
+    assert len(errors) == 1
+    error = errors[0]
+    assert error["loc"] == ("importance",)
+    assert error["input"] == -1
+    assert error["type"] == "greater_than_equal"
 
 
 
diff --git a/tests/test_processing/test_sourcecollectorprocess.py b/tests/test_processing/test_sourcecollectorprocess.py
index 2e3f15d..b841063 100644
--- a/tests/test_processing/test_sourcecollectorprocess.py
+++ b/tests/test_processing/test_sourcecollectorprocess.py
@@ -64,7 +64,7 @@ def test_source_collector_disable_on_failure():
         module=FaultySourceCollector,
         config=SourceCollectorSettings(
             module_name="faulty_source_collector",
-            update_interval=0.1,
+            update_interval=1,
             disable_duration=3600,
             exit_on_error=False,
             error_duration=10,
@@ -77,7 +77,7 @@ def test_source_collector_disable_on_failure():
     # if we terminate the process before it has the chance to
     # set state["ok"] to False, the test will fail.
     process.start()
-    time.sleep(0.5)  # wait for process to start
+    time.sleep(1.5)  # wait for process to start
     process.stop_event.set()
     process.join(timeout=0.5)
 
diff --git a/zabbix_auto_config/__init__.py b/zabbix_auto_config/__init__.py
index 77ba5d9..ee1ecac 100644
--- a/zabbix_auto_config/__init__.py
+++ b/zabbix_auto_config/__init__.py
@@ -120,9 +120,6 @@ def main():
     multiprocessing_logging.install_mp_handler()
     logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)
 
-    if config.zac.health_file is not None:
-        health_file = os.path.abspath(config.zac.health_file)
-
     logging.info("Main start (%d) version %s", os.getpid(), __version__)
 
     stop_event = multiprocessing.Event()
@@ -165,8 +162,13 @@ def main():
 
         while not stop_event.is_set():
             if next_status < datetime.datetime.now():
-                if health_file is not None:
-                    write_health(health_file, processes, source_hosts_queues, config.zabbix.failsafe)
+                if config.zac.health_file is not None:
+                    write_health(
+                        config.zac.health_file,
+                        processes,
+                        source_hosts_queues,
+                        config.zabbix.failsafe,
+                    )
                 log_process_status(processes)
                 next_status = datetime.datetime.now() + datetime.timedelta(seconds=status_interval)
 
diff --git a/zabbix_auto_config/models.py b/zabbix_auto_config/models.py
index fa1c6c1..a0b3e62 100644
--- a/zabbix_auto_config/models.py
+++ b/zabbix_auto_config/models.py
@@ -1,43 +1,31 @@
 import logging
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
-from typing import (
-    Any,
-    Dict,
-    List,
-    Optional,
-    Set,
-    Tuple,
-    Union,
-)
-
-from pydantic import (
-    BaseModel,
-    BaseSettings as PydanticBaseSettings,
-    Field,
-    conint,
-    root_validator,
-    validator,
-    Extra,
-)
-from pydantic.fields import ModelField
+from pydantic import BaseModel
+from pydantic import BaseModel as PydanticBaseModel
+from pydantic import ConfigDict, Field, field_validator, model_validator
+from typing_extensions import Annotated
 
 from . import utils
 
 # TODO: Models aren't validated when making changes to a set/list. Why? How to handle?
 
 
-class BaseSettings(PydanticBaseSettings, extra=Extra.ignore):
+class ConfigBaseModel(PydanticBaseModel, extra="ignore"):
+    """Base class for all config models. Warns if unknown fields are passed in."""
     # https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally
 
-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
     def _check_unknown_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]:
         """Checks for unknown fields and logs a warning if any are found.
         Does not log warnings if extra is set to `Extra.allow`.
         """
-        if cls.__config__.extra == Extra.allow:
+        if cls.model_config["extra"] == "allow":
             return values
         for key in values:
-            if key not in cls.__fields__:
+            if key not in cls.model_fields:
                 logging.warning(
                     "%s: Got unknown config field '%s'.",
                     getattr(cls, "__name__", str(cls)),
@@ -46,7 +34,7 @@ def _check_unknown_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]:
         return values
 
 
-class ZabbixSettings(BaseSettings):
+class ZabbixSettings(ConfigBaseModel):
     map_dir: str
     url: str
     username: str
@@ -72,15 +60,14 @@ class ZabbixSettings(BaseSettings):
     # These groups are not managed by ZAC beyond creating them.
     extra_siteadmin_hostgroup_prefixes: Set[str] = set()
 
-class ZacSettings(BaseSettings):
+class ZacSettings(ConfigBaseModel):
     source_collector_dir: str
     host_modifier_dir: str
     db_uri: str
+    health_file: Optional[Path] = None
 
-    health_file: str = None
 
-
-class SourceCollectorSettings(BaseSettings, extra=Extra.allow):
+class SourceCollectorSettings(ConfigBaseModel, extra="allow"):
     module_name: str
     update_interval: int
     error_tolerance: int = Field(
@@ -106,25 +93,25 @@ class SourceCollectorSettings(BaseSettings, extra=Extra.allow):
         ge=0,
     )
 
-    @validator("error_duration")
-    def _validate_error_duration_is_greater(cls, v: int, values: Dict[str, Any]) -> int:
-        error_tolerance = values["error_tolerance"]
-        update_interval = values["update_interval"]
+    @model_validator(mode="after")
+    def _validate_error_duration_is_greater(self) -> "SourceCollectorSettings":
         # If no tolerance, we don't need to be concerned with how long errors
         # are kept on record, because a single error will disable the collector.
-        if error_tolerance <= 0:
+        if self.error_tolerance <= 0:
             # hack to ensure RollingErrorCounter.count() doesn't discard the error
             # before it is counted
-            return 9999
-        elif (product := error_tolerance * update_interval) > v:
+            self.error_duration = 9999
+        elif (
+            product := self.error_tolerance * self.update_interval
+        ) > self.error_duration:
             raise ValueError(
-                f"Invalid value for error_duration ({v}). It should be greater than error_tolerance ({error_tolerance}) "
-                f"times update_interval ({update_interval}), i.e., greater than {product}. Please adjust accordingly."
+                f"Invalid value for error_duration ({self.error_duration}). It should be greater than error_tolerance ({self.error_tolerance}) "
+                f"times update_interval ({self.update_interval}), i.e., greater than {product}. Please adjust accordingly."
             )
-        return v
+        return self
 
 
-class Settings(BaseSettings):
+class Settings(ConfigBaseModel):
     zac: ZacSettings
     zabbix: ZabbixSettings
     source_collectors: Dict[str, SourceCollectorSettings]
@@ -135,57 +122,55 @@ class Interface(BaseModel):
     endpoint: str
     port: str  # Ports could be macros, i.e. strings
     type: int
+    model_config = ConfigDict(validate_assignment=True)
 
-    class Config:
-        validate_assignment = True
-
-    # These validators look static, but pydantic uses the class argument.
-    # pylint: disable=no-self-use, no-self-argument
-    @validator("type")
-    def type_2_must_have_details(cls, v, values):
-        if v == 2 and not values["details"]:
+    @model_validator(mode="after")
+    def type_2_must_have_details(self) -> "Interface":
+        if self.type == 2 and not self.details:
             raise ValueError("Interface of type 2 must have details set")
-        return v
+        return self
 
 
 class Host(BaseModel):
     enabled: bool
     hostname: str
 
-    importance: Optional[conint(ge=0)]  # type: ignore # mypy blows up: https://github.com/pydantic/pydantic/issues/239 & https://github.com/pydantic/pydantic/issues/156
+    importance: Optional[Annotated[int, Field(ge=0)]] = None
     interfaces: List[Interface] = []
     inventory: Dict[str, str] = {}
     macros: Optional[None] = None  # TODO: What should macros look like?
     properties: Set[str] = set()
-    proxy_pattern: Optional[str]  # NOTE: replace with Optional[typing.Pattern]?
+    proxy_pattern: Optional[str] = None  # NOTE: replace with Optional[typing.Pattern]?
     siteadmins: Set[str] = set()
     sources: Set[str] = set()
     tags: Set[Tuple[str, str]] = set()
+    model_config = ConfigDict(validate_assignment=True, revalidate_instances="always")
 
-    class Config:
-        validate_assignment = True
-
-    # pylint: disable=no-self-use, no-self-argument
-    @validator("*", pre=True)
-    def none_defaults_to_field_default(cls, v: Any, field: ModelField) -> Any:
-        """The field's default value or factory is returned if the value is None."""
+    @model_validator(mode="before")
+    @classmethod
+    def none_defaults_to_field_default(cls, data: Any) -> Any:
+        """The field's default value or factory is used if the value is None."""
         # TODO: add field type check
-        if v is None:
-            if field.default is not None:
-                return field.default
-            elif field.default_factory is not None:
-                return field.default_factory()
-        return v
-
-    # pylint: disable=no-self-use, no-self-argument
-    @validator("interfaces")
+        if not isinstance(data, dict):
+            return data  # pydantic will handle the error
+        for field_name, value in data.items():
+            if value is None:
+                field = cls.model_fields[field_name]
+                if field.default is not None:
+                    data[field_name] = field.default
+                elif field.default_factory is not None:
+                    data[field_name] = field.default_factory()
+        return data
+
+    @field_validator("interfaces")
+    @classmethod
     def no_duplicate_interface_types(cls, v: List[Interface]) -> List[Interface]:
         types = [interface.type for interface in v]
         assert len(types) == len(set(types)), f"No duplicate interface types: {types}"
         return v
 
-    # pylint: disable=no-self-use, no-self-argument
-    @validator("proxy_pattern")
+    @field_validator("proxy_pattern")
+    @classmethod
     def must_be_valid_regexp_pattern(cls, v: Optional[str]) -> Optional[str]:
         if v is not None:
             assert utils.is_valid_regexp(v), f"Must be valid regexp pattern: {v!r}"
diff --git a/zabbix_auto_config/processing.py b/zabbix_auto_config/processing.py
index a56a081..ec99e8d 100644
--- a/zabbix_auto_config/processing.py
+++ b/zabbix_auto_config/processing.py
@@ -113,8 +113,8 @@ def __init__(
         self.update_interval = self.config.update_interval
 
         # Pop off the config fields from the config we pass to the module
-        self.collector_config = config.dict()
-        for key in self.config.__fields__:
+        self.collector_config = config.model_dump()
+        for key in self.config.model_fields:
             self.collector_config.pop(key, None)
 
         # Repeated errors will disable the source
@@ -266,13 +266,14 @@ def handle_source_host(
                 # logging.debug(f"Replaced host <{host['hostname']}> from source <{source}>")
                 cursor.execute(
                     f"UPDATE {self.db_source_table} SET data = %s WHERE data->>'hostname' = %s AND data->'sources' ? %s",
-                    [host.json(), host.hostname, source],
+                    [host.model_dump_json(), host.hostname, source],
                 )
                 return HostAction.UPDATE
         else:
             # logging.debug(f"Inserted host <{host['hostname']}> from source <{source}>")
             cursor.execute(
-                f"INSERT INTO {self.db_source_table} (data) VALUES (%s)", [host.json()]
+                f"INSERT INTO {self.db_source_table} (data) VALUES (%s)",
+                [host.model_dump_json()],
             )
             return HostAction.INSERT
 
@@ -408,14 +409,17 @@ def handle_host(
 
         for host_modifier in self.host_modifiers:
             try:
-                modified_host = host_modifier["module"].modify(host.copy(deep=True))
+                modified_host = host_modifier["module"].modify(
+                    host.model_copy(deep=True)
+                )
                 assert isinstance(
                     modified_host, models.Host
                 ), f"Modifier returned invalid type: {type(modified_host)}"
                 assert (
                     host.hostname == modified_host.hostname
                 ), f"Modifier changed the hostname, '{host.hostname}' -> '{modified_host.hostname}'"
-                host = modified_host
+                # Re-validate the host after modification
+                host = host.model_validate(modified_host)
             except AssertionError as e:
                 logging.warning(
                     "Host, '%s', was modified to be invalid by modifier: '%s'. Error: %s",
@@ -440,13 +444,14 @@ def handle_host(
                 # logging.debug(f"Replaced host <{host['hostname']}> from source <{source}>")
                 cursor.execute(
                     f"UPDATE {self.db_hosts_table} SET data = %s WHERE data->>'hostname' = %s",
-                    [host.json(), host.hostname],
+                    [host.model_dump_json(), host.hostname],
                 )
                 return HostAction.UPDATE
         else:
             # logging.debug(f"Inserted host <{host['hostname']}> from source <{source}>")
             cursor.execute(
-                f"INSERT INTO {self.db_hosts_table} (data) VALUES (%s)", [host.json()]
+                f"INSERT INTO {self.db_hosts_table} (data) VALUES (%s)",
+                [host.model_dump_json()],
             )
             return HostAction.INSERT