Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Approve failsafe with temp file #75

Merged
merged 4 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions config.sample.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@ source_collector_dir = "path/to/source_collector_dir/"
host_modifier_dir = "path/to/host_modifier_dir/"
db_uri = "dbname='zac' user='zabbix' host='localhost' password='secret' port=5432 connect_timeout=2"
log_level = "DEBUG"
# Health status for each ZAC process.
health_file = "/tmp/zac_health.json"
# File containing hostnames of hosts to add/remove when failsafe is reached.
failsafe_file = "/tmp/zac_failsafe.json"
# File to signal manual approval of adding/removing hosts when failsafe is reached.
# The file is automatically removed after changes are made.
failsafe_ok_file = "/tmp/zac_failsafe_ok"

[zabbix]
map_dir = "path/to/map_dir/"
Expand Down
7 changes: 4 additions & 3 deletions zabbix_auto_config/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,12 @@ class ZacSettings(ConfigBaseModel):
source_collector_dir: str
host_modifier_dir: str
db_uri: str
health_file: Optional[Path] = None
log_level: int = Field(logging.DEBUG, description="The log level to use.")
health_file: Optional[Path] = None
failsafe_file: Optional[Path] = None
failsafe_ok_file: Optional[Path] = None

@field_validator("failsafe_file", "health_file", mode="after")
@field_validator("health_file", "failsafe_file", "failsafe_ok_file", mode="after")
@classmethod
def _validate_file_path(cls, v: Optional[Path], info: ValidationInfo) -> Optional[Path]:
if v is None:
Expand Down Expand Up @@ -283,4 +284,4 @@ class HostActions(BaseModel):

def write_json(self, path: Path) -> None:
"""Writes a JSON serialized representation of self to a file."""
utils.write_file(path, self.model_dump_json(indent=2))
utils.write_file(path, self.model_dump_json(indent=2))
44 changes: 40 additions & 4 deletions zabbix_auto_config/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,22 @@ def set_tags(self, zabbix_host, tags):
else:
logging.info("DRYRUN: Setting tags (%s) on host: '%s' (%s)", tags, zabbix_host["host"], zabbix_host["hostid"])

def handle_failsafe_limit(self, to_add: List[str], to_remove: List[str]) -> None:
"""Handles situations where the number of hosts to add/remove exceeds the failsafe.

If a failsafe OK file exists, the method will remove it and proceed with the changes.
Otherwise, it will write the list of hosts to add and remove to a failsafe file and
raise a ZACException."""
if self._check_failsafe_ok_file():
return
self.write_failsafe_hosts(to_add, to_remove)
logging.warning(
"Too many hosts to change (failsafe=%d). Remove: %d, Add: %d. Aborting",
self.config.failsafe,
len(to_remove),
len(to_add),
)
raise exceptions.ZACException("Failsafe triggered")

def write_failsafe_hosts(self, to_add: List[str], to_remove: List[str]) -> None:
if not self.settings.zac.failsafe_file:
Expand All @@ -747,6 +763,25 @@ def write_failsafe_hosts(self, to_add: List[str], to_remove: List[str]) -> None:
self.settings.zac.failsafe_file,
)

def _check_failsafe_ok_file(self) -> bool:
if not self.settings.zac.failsafe_ok_file:
return False
if not self.settings.zac.failsafe_ok_file.exists():
logging.info(
"Failsafe OK file %s does not exist. Create it to approve changes.",
self.settings.zac.failsafe_ok_file,
)
return False
try:
self.settings.zac.failsafe_ok_file.unlink()
except OSError as e:
logging.error(
"Failsafe cannot be approved. Unable to delete failsafe OK file: %s", e
)
return False
logging.info("Failsafe OK file exists. Proceeding with changes.")
return True

def do_update(self):
with self.db_connection, self.db_connection.cursor() as db_cursor:
db_cursor.execute(f"SELECT data FROM {self.db_hosts_table} WHERE data->>'enabled' = 'true'")
Expand Down Expand Up @@ -795,10 +830,11 @@ def do_update(self):
logging.debug("Only in db: %s", " ".join(hostnames_to_add[:10]))
logging.debug("In both: %d", len(hostnames_in_both))

if len(hostnames_to_remove) > self.config.failsafe or len(hostnames_to_add) > self.config.failsafe:
logging.warning("Too many hosts to change (failsafe=%d). Remove: %d, Add: %d. Aborting", self.config.failsafe, len(hostnames_to_remove), len(hostnames_to_add))
self.write_failsafe_hosts(hostnames_to_add, hostnames_to_remove)
raise exceptions.ZACException("Failsafe triggered")
if (
len(hostnames_to_remove) > self.config.failsafe
or len(hostnames_to_add) > self.config.failsafe
):
self.handle_failsafe_limit(hostnames_to_add, hostnames_to_remove)

for hostname in hostnames_to_remove:
if self.stop_event.is_set():
Expand Down
Loading