Skip to content

Commit

Permalink
Added error handle to orphaned objects monitoring (#247)
Browse files Browse the repository at this point in the history
* Added ORPHANED_OBJECTS_ERROR_MSG_FIELD

* Update old tests

* Refactoring

* Add OrphanedObjectsState

---------

Co-authored-by: Nikita Unisikhin <nik.unisikhin.04@gmail.com>
  • Loading branch information
NikitaUnisikhin and Nikita Unisikhin authored Nov 5, 2024
1 parent d7ca9d3 commit d49b7c9
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 57 deletions.
50 changes: 30 additions & 20 deletions ch_tools/chadmin/cli/object_storage_group.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import json
from datetime import timedelta
from typing import Optional

from click import Choice, Context, group, option, pass_context
from humanfriendly import format_size

from ch_tools.chadmin.cli.chadmin_group import Chadmin
from ch_tools.chadmin.internal.object_storage.orphaned_objects_state import (
OrphanedObjectsState,
)
from ch_tools.chadmin.internal.zookeeper import (
check_zk_node,
create_zk_nodes,
Expand All @@ -23,7 +25,6 @@
# Use big enough timeout for stream HTTP query
STREAM_TIMEOUT = 10 * 60

ORPHANED_OBJECTS_SIZE_FIELD = "orphaned_objects_size"
STATE_LOCAL_PATH = "/tmp/object_storage_cleanup_state.json"


Expand Down Expand Up @@ -139,37 +140,46 @@ def clean_command(
"""
Clean orphaned S3 objects.
"""
deleted, total_size = clean(
ctx,
object_name_prefix,
from_time,
to_time,
CleanScope(clean_scope),
cluster_name,
dry_run,
keep_paths,
use_saved_list,
)
deleted = 0
total_size = 0
error_msg = ""

try:
deleted, total_size = clean(
ctx,
object_name_prefix,
from_time,
to_time,
CleanScope(clean_scope),
cluster_name,
dry_run,
keep_paths,
use_saved_list,
)
except Exception as e:
error_msg = str(e)

state = OrphanedObjectsState(total_size, error_msg)

if store_state_zk_path:
_store_state_zk_save(ctx, store_state_zk_path, total_size)
_store_state_zk_save(ctx, store_state_zk_path, state)

if store_state_local:
_store_state_local_save(ctx, total_size)
_store_state_local_save(ctx, state)

_print_response(ctx, dry_run, deleted, total_size)


def _store_state_zk_save(ctx: Context, path: str, total_size: int) -> None:
def _store_state_zk_save(ctx: Context, path: str, state: OrphanedObjectsState) -> None:
if not check_zk_node(ctx, path):
create_zk_nodes(ctx, [path], make_parents=True)
state_data = json.dumps({ORPHANED_OBJECTS_SIZE_FIELD: total_size}, indent=4)
update_zk_nodes(ctx, [path], state_data.encode("utf-8"))
state_data = state.to_json().encode("utf-8")
update_zk_nodes(ctx, [path], state_data)


def _store_state_local_save(_: Context, total_size: int) -> None:
def _store_state_local_save(_: Context, state: OrphanedObjectsState) -> None:
with open(STATE_LOCAL_PATH, "w", encoding="utf-8") as file:
json.dump({ORPHANED_OBJECTS_SIZE_FIELD: total_size}, file, indent=4)
file.write(state.to_json())


def _print_response(ctx: Context, dry_run: bool, deleted: int, total_size: int) -> None:
Expand Down
19 changes: 19 additions & 0 deletions ch_tools/chadmin/internal/object_storage/orphaned_objects_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json
from dataclasses import asdict, dataclass


@dataclass
class OrphanedObjectsState:
orphaned_objects_size: int
error_msg: str

@classmethod
def from_json(cls, json_str: str) -> "OrphanedObjectsState":
data = json.loads(json_str)
return cls(
orphaned_objects_size=data["orphaned_objects_size"],
error_msg=data["error_msg"],
)

def to_json(self) -> str:
return json.dumps(asdict(self), indent=4)
69 changes: 40 additions & 29 deletions ch_tools/monrun_checks/ch_orphaned_objects.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import json

import click

from ch_tools.chadmin.cli.object_storage_group import (
ORPHANED_OBJECTS_SIZE_FIELD,
STATE_LOCAL_PATH,
from ch_tools.chadmin.cli.object_storage_group import STATE_LOCAL_PATH
from ch_tools.chadmin.internal.object_storage.orphaned_objects_state import (
OrphanedObjectsState,
)
from ch_tools.chadmin.internal.zookeeper import check_zk_node, get_zk_node
from ch_tools.chadmin.internal.zookeeper import get_zk_node
from ch_tools.common.result import CRIT, OK, WARNING, Result


Expand Down Expand Up @@ -48,19 +46,24 @@ def orphaned_objects_command(
) -> Result:
_check_mutually_exclusive(state_local, state_zk_path)

total_size = 0
if state_zk_path:
total_size = _zk_get_total_size(ctx, state_zk_path)
try:
state = _get_orphaned_objects_state(ctx, state_local, state_zk_path)
except Exception as e:
return Result(CRIT, str(e))

if state_local:
total_size = _local_get_total_size()
total_size = state.orphaned_objects_size
error_msg = state.error_msg

msg = f"Total size: {total_size}"
if error_msg != "":
return Result(CRIT, error_msg)

result_msg = f"Total size: {total_size}"
if total_size >= crit:
return Result(CRIT, msg)
return Result(CRIT, result_msg)
if total_size >= warn:
return Result(WARNING, msg)
return Result(OK, msg)
return Result(WARNING, result_msg)

return Result(OK, result_msg)


def _check_mutually_exclusive(state_local, state_zk_path):
Expand All @@ -75,20 +78,28 @@ def _check_mutually_exclusive(state_local, state_zk_path):
)


def _local_get_total_size() -> int:
try:
with open(STATE_LOCAL_PATH, mode="r", encoding="utf-8") as file:
total_size = json.load(file).get(ORPHANED_OBJECTS_SIZE_FIELD)
except FileNotFoundError:
total_size = 0
def _get_orphaned_objects_state(
ctx: click.Context, state_local: bool, state_zk_path: str
) -> "OrphanedObjectsState":
if state_local:
state = _local_get_orphaned_objects_state()

if state_zk_path:
state = _zk_get_orphaned_objects_state(ctx, state_zk_path)

return total_size
if state is None:
raise FileNotFoundError()

return state

def _zk_get_total_size(ctx: click.Context, state_zk_path: str) -> int:
total_size = 0
if check_zk_node(ctx, state_zk_path):
total_size = json.loads(get_zk_node(ctx, state_zk_path)).get(
ORPHANED_OBJECTS_SIZE_FIELD
)
return total_size

def _local_get_orphaned_objects_state() -> "OrphanedObjectsState":
with open(STATE_LOCAL_PATH, mode="r", encoding="utf-8") as file:
return OrphanedObjectsState.from_json(file.read())


def _zk_get_orphaned_objects_state(
ctx: click.Context, state_zk_path: str
) -> "OrphanedObjectsState":
zk_data = get_zk_node(ctx, state_zk_path)
return OrphanedObjectsState.from_json(zk_data)
11 changes: 11 additions & 0 deletions tests/features/monrun.feature
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,14 @@ Feature: ch-monitoring tool
"""
1;Unknown error: One of these options must be provided: --state-local, --state-zk-path
"""

Scenario: Check clickhouse orphaned objects with not empty error_msg
When we create file /tmp/object_storage_cleanup_state.json with data "{ \"orphaned_objects_size\": 0, \"error_msg\": \"ERROR\" }"
And we execute command on clickhouse01
"""
ch-monitoring orphaned-objects --state-local
"""
Then we get response
"""
2;ERROR
"""
12 changes: 8 additions & 4 deletions tests/features/object_storage.feature
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ Feature: chadmin object-storage commands
Then we get zookeeper node with "/tmp/shard_1" path
"""
{
"orphaned_objects_size": 0
"orphaned_objects_size": 0,
"error_msg": ""
}
"""
When we put object in S3
Expand All @@ -174,7 +175,8 @@ Feature: chadmin object-storage commands
Then we get zookeeper node with "/tmp/shard_1" path
"""
{
"orphaned_objects_size": 10
"orphaned_objects_size": 10,
"error_msg": ""
}
"""

Expand All @@ -186,7 +188,8 @@ Feature: chadmin object-storage commands
Then we get file /tmp/object_storage_cleanup_state.json
"""
{
"orphaned_objects_size": 0
"orphaned_objects_size": 0,
"error_msg": ""
}
"""
When we put object in S3
Expand All @@ -202,6 +205,7 @@ Feature: chadmin object-storage commands
Then we get file /tmp/object_storage_cleanup_state.json
"""
{
"orphaned_objects_size": 10
"orphaned_objects_size": 10,
"error_msg": ""
}
"""
14 changes: 10 additions & 4 deletions tests/steps/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,21 @@ def step_get_response_not_contains(context, entry):
assert_that(context.response, is_not(contains_string(entry))) # type: ignore


@when('we create file {file_path} with data "{data}"')
def step_create_file(context, file_path, data):
container = docker.get_container(context, "clickhouse01")
result = container.exec_run(
["bash", "-c", f'echo "{data}" > {file_path}'], user="root"
)
assert result.exit_code == 0


@then("we get file {file_path}")
def step_get_file(context, file_path):
container = docker.get_container(context, "clickhouse01")
context.command = context.text.strip()
result = container.exec_run(["bash", "-c", f"cat {file_path}"], user="root")
context.response = result.output.decode().strip()
context.exit_code = result.exit_code
if context.exit_code != 0:
assert_that("", equal_to(context.text))
assert result.exit_code == 0
assert_that(context.response, equal_to(context.text))


Expand Down

0 comments on commit d49b7c9

Please sign in to comment.