Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added error handle to orphaned objects monitoring #247

Merged
merged 4 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 30 additions & 20 deletions ch_tools/chadmin/cli/object_storage_group.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import json
from datetime import timedelta
from typing import Optional

from click import Choice, Context, group, option, pass_context
from humanfriendly import format_size

from ch_tools.chadmin.cli.chadmin_group import Chadmin
from ch_tools.chadmin.internal.object_storage.orphaned_objects_state import (
OrphanedObjectsState,
)
from ch_tools.chadmin.internal.zookeeper import (
check_zk_node,
create_zk_nodes,
Expand All @@ -23,7 +25,6 @@
# Use big enough timeout for stream HTTP query
STREAM_TIMEOUT = 10 * 60

ORPHANED_OBJECTS_SIZE_FIELD = "orphaned_objects_size"
STATE_LOCAL_PATH = "/tmp/object_storage_cleanup_state.json"


Expand Down Expand Up @@ -139,37 +140,46 @@ def clean_command(
"""
Clean orphaned S3 objects.
"""
deleted, total_size = clean(
ctx,
object_name_prefix,
from_time,
to_time,
CleanScope(clean_scope),
cluster_name,
dry_run,
keep_paths,
use_saved_list,
)
deleted = 0
total_size = 0
error_msg = ""

try:
deleted, total_size = clean(
ctx,
object_name_prefix,
from_time,
to_time,
CleanScope(clean_scope),
cluster_name,
dry_run,
keep_paths,
use_saved_list,
)
except Exception as e:
error_msg = str(e)

state = OrphanedObjectsState(total_size, error_msg)

if store_state_zk_path:
_store_state_zk_save(ctx, store_state_zk_path, total_size)
_store_state_zk_save(ctx, store_state_zk_path, state)

if store_state_local:
_store_state_local_save(ctx, total_size)
_store_state_local_save(ctx, state)

_print_response(ctx, dry_run, deleted, total_size)


def _store_state_zk_save(ctx: Context, path: str, total_size: int) -> None:
def _store_state_zk_save(ctx: Context, path: str, state: OrphanedObjectsState) -> None:
if not check_zk_node(ctx, path):
create_zk_nodes(ctx, [path], make_parents=True)
state_data = json.dumps({ORPHANED_OBJECTS_SIZE_FIELD: total_size}, indent=4)
update_zk_nodes(ctx, [path], state_data.encode("utf-8"))
state_data = state.to_json().encode("utf-8")
update_zk_nodes(ctx, [path], state_data)


def _store_state_local_save(_: Context, total_size: int) -> None:
def _store_state_local_save(_: Context, state: OrphanedObjectsState) -> None:
with open(STATE_LOCAL_PATH, "w", encoding="utf-8") as file:
json.dump({ORPHANED_OBJECTS_SIZE_FIELD: total_size}, file, indent=4)
file.write(state.to_json())


def _print_response(ctx: Context, dry_run: bool, deleted: int, total_size: int) -> None:
Expand Down
19 changes: 19 additions & 0 deletions ch_tools/chadmin/internal/object_storage/orphaned_objects_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json
from dataclasses import asdict, dataclass


@dataclass
class OrphanedObjectsState:
orphaned_objects_size: int
error_msg: str

@classmethod
def from_json(cls, json_str: str) -> "OrphanedObjectsState":
data = json.loads(json_str)
return cls(
orphaned_objects_size=data["orphaned_objects_size"],
error_msg=data["error_msg"],
)

def to_json(self) -> str:
return json.dumps(asdict(self), indent=4)
69 changes: 40 additions & 29 deletions ch_tools/monrun_checks/ch_orphaned_objects.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import json

import click

from ch_tools.chadmin.cli.object_storage_group import (
ORPHANED_OBJECTS_SIZE_FIELD,
STATE_LOCAL_PATH,
from ch_tools.chadmin.cli.object_storage_group import STATE_LOCAL_PATH
from ch_tools.chadmin.internal.object_storage.orphaned_objects_state import (
OrphanedObjectsState,
)
from ch_tools.chadmin.internal.zookeeper import check_zk_node, get_zk_node
from ch_tools.chadmin.internal.zookeeper import get_zk_node
from ch_tools.common.result import CRIT, OK, WARNING, Result


Expand Down Expand Up @@ -48,19 +46,24 @@ def orphaned_objects_command(
) -> Result:
_check_mutually_exclusive(state_local, state_zk_path)

total_size = 0
if state_zk_path:
total_size = _zk_get_total_size(ctx, state_zk_path)
try:
state = _get_orphaned_objects_state(ctx, state_local, state_zk_path)
except Exception as e:
return Result(CRIT, str(e))

if state_local:
total_size = _local_get_total_size()
total_size = state.orphaned_objects_size
error_msg = state.error_msg

msg = f"Total size: {total_size}"
if error_msg != "":
return Result(CRIT, error_msg)

result_msg = f"Total size: {total_size}"
if total_size >= crit:
return Result(CRIT, msg)
return Result(CRIT, result_msg)
if total_size >= warn:
return Result(WARNING, msg)
return Result(OK, msg)
return Result(WARNING, result_msg)

return Result(OK, result_msg)


def _check_mutually_exclusive(state_local, state_zk_path):
Expand All @@ -75,20 +78,28 @@ def _check_mutually_exclusive(state_local, state_zk_path):
)


def _local_get_total_size() -> int:
try:
with open(STATE_LOCAL_PATH, mode="r", encoding="utf-8") as file:
total_size = json.load(file).get(ORPHANED_OBJECTS_SIZE_FIELD)
except FileNotFoundError:
total_size = 0
def _get_orphaned_objects_state(
ctx: click.Context, state_local: bool, state_zk_path: str
) -> "OrphanedObjectsState":
if state_local:
state = _local_get_orphaned_objects_state()

if state_zk_path:
state = _zk_get_orphaned_objects_state(ctx, state_zk_path)

return total_size
if state is None:
raise FileNotFoundError()

return state

def _zk_get_total_size(ctx: click.Context, state_zk_path: str) -> int:
total_size = 0
if check_zk_node(ctx, state_zk_path):
total_size = json.loads(get_zk_node(ctx, state_zk_path)).get(
ORPHANED_OBJECTS_SIZE_FIELD
)
return total_size

def _local_get_orphaned_objects_state() -> "OrphanedObjectsState":
with open(STATE_LOCAL_PATH, mode="r", encoding="utf-8") as file:
return OrphanedObjectsState.from_json(file.read())


def _zk_get_orphaned_objects_state(
ctx: click.Context, state_zk_path: str
) -> "OrphanedObjectsState":
zk_data = get_zk_node(ctx, state_zk_path)
return OrphanedObjectsState.from_json(zk_data)
11 changes: 11 additions & 0 deletions tests/features/monrun.feature
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,14 @@ Feature: ch-monitoring tool
"""
1;Unknown error: One of these options must be provided: --state-local, --state-zk-path
"""

Scenario: Check clickhouse orphaned objects with not empty error_msg
When we create file /tmp/object_storage_cleanup_state.json with data "{ \"orphaned_objects_size\": 0, \"error_msg\": \"ERROR\" }"
And we execute command on clickhouse01
"""
ch-monitoring orphaned-objects --state-local
"""
Then we get response
"""
2;ERROR
"""
12 changes: 8 additions & 4 deletions tests/features/object_storage.feature
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ Feature: chadmin object-storage commands
Then we get zookeeper node with "/tmp/shard_1" path
"""
{
"orphaned_objects_size": 0
"orphaned_objects_size": 0,
"error_msg": ""
}
"""
When we put object in S3
Expand All @@ -174,7 +175,8 @@ Feature: chadmin object-storage commands
Then we get zookeeper node with "/tmp/shard_1" path
"""
{
"orphaned_objects_size": 10
"orphaned_objects_size": 10,
"error_msg": ""
}
"""

Expand All @@ -186,7 +188,8 @@ Feature: chadmin object-storage commands
Then we get file /tmp/object_storage_cleanup_state.json
"""
{
"orphaned_objects_size": 0
"orphaned_objects_size": 0,
"error_msg": ""
}
"""
When we put object in S3
Expand All @@ -202,6 +205,7 @@ Feature: chadmin object-storage commands
Then we get file /tmp/object_storage_cleanup_state.json
"""
{
"orphaned_objects_size": 10
"orphaned_objects_size": 10,
"error_msg": ""
}
"""
14 changes: 10 additions & 4 deletions tests/steps/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,21 @@ def step_get_response_not_contains(context, entry):
assert_that(context.response, is_not(contains_string(entry))) # type: ignore


@when('we create file {file_path} with data "{data}"')
def step_create_file(context, file_path, data):
container = docker.get_container(context, "clickhouse01")
result = container.exec_run(
["bash", "-c", f'echo "{data}" > {file_path}'], user="root"
)
assert result.exit_code == 0


@then("we get file {file_path}")
def step_get_file(context, file_path):
container = docker.get_container(context, "clickhouse01")
context.command = context.text.strip()
result = container.exec_run(["bash", "-c", f"cat {file_path}"], user="root")
context.response = result.output.decode().strip()
context.exit_code = result.exit_code
if context.exit_code != 0:
assert_that("", equal_to(context.text))
assert result.exit_code == 0
assert_that(context.response, equal_to(context.text))


Expand Down
Loading