Skip to content

Commit

Permalink
chore: add maintain and stats cli
Browse files Browse the repository at this point in the history
  • Loading branch information
smotornyuk committed Jun 25, 2024
1 parent 21a43e4 commit 3831390
Show file tree
Hide file tree
Showing 8 changed files with 323 additions and 8 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ default_install_hook_types:

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
# - id: check-yaml
- id: end-of-file-fixer
Expand All @@ -25,14 +25,14 @@ repos:

## Black
- repo: https://github.com/psf/black
rev: 24.2.0
rev: 24.4.2
hooks:
- id: black
stages: [pre-commit]

## Ruff
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.3.0
rev: v0.4.10
hooks:
- id: ruff
stages: [pre-commit]
2 changes: 1 addition & 1 deletion ckanext/files/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import ckan.plugins.toolkit as tk
from ckan.config.declaration import Declaration, Key

from . import config, exceptions, model, utils, types
from . import config, exceptions, model, types, utils


class PFileModel(Protocol):
Expand Down
4 changes: 3 additions & 1 deletion ckanext/files/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ckanext.files import base, config, exceptions, shared
from ckanext.files.model import File, Owner

from . import dev, migrate
from . import dev, maintain, migrate, stats

__all__ = [
"files",
Expand All @@ -27,6 +27,8 @@ def files():

files.add_command(dev.group, "dev")
files.add_command(migrate.group, "migrate")
files.add_command(stats.group, "stats")
files.add_command(maintain.group, "maintain")


@files.command()
Expand Down
171 changes: 171 additions & 0 deletions ckanext/files/cli/maintain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
from __future__ import annotations

from datetime import datetime, timezone

import click
import sqlalchemy as sa

import ckan.plugins.toolkit as tk
from ckan import model

from ckanext.files import shared, utils


def _now():
return datetime.now(timezone.utc)


@click.group()
def group():
"""Storage maintenance."""


storage_option = click.option(
"-s",
"--storage-name",
help="Name of the configured storage",
)


@group.command()
@storage_option
@click.option("--remove", is_flag=True, help="Remove files")
def empty_owner(storage_name: str | None, remove: bool):
"""Manage files that have no owner."""
storage_name = storage_name or shared.config.default_storage()
try:
storage = shared.get_storage(storage_name)
except shared.exc.UnknownStorageError as err:
tk.error_shout(err)
raise click.Abort from err

if remove and not storage.supports(shared.Capability.REMOVE):
tk.error_shout(f"Storage {storage_name} does not support file removal")
raise click.Abort

stmt = (
sa.select(shared.File)
.outerjoin(shared.File.owner_info)
.where(shared.File.storage == storage_name, shared.Owner.owner_id.is_(None))
)

total = model.Session.scalar(sa.select(sa.func.count()).select_from(stmt))
if not total:
click.echo(f"Every file in storage {storage_name} has owner reference")
return
click.echo("Following files do not have owner reference")

for file in model.Session.scalars(stmt):
size = utils.humanize_filesize(file.size)
click.echo(f"\t{file.id}: {file.name} [{file.content_type}, {size}]")

if remove and click.confirm("Do you want to delete these files?"):
action = tk.get_action("files_file_delete")

with click.progressbar(model.Session.scalars(stmt), length=total) as bar:
for file in bar:
action({"ignore_auth": True}, {"id": file.id})


@group.command()
@storage_option
@click.option("--remove", is_flag=True, help="Remove files")
def invalid_owner(storage_name: str | None, remove: bool):
"""Manage files that has suspicious owner reference."""
storage_name = storage_name or shared.config.default_storage()
try:
storage = shared.get_storage(storage_name)
except shared.exc.UnknownStorageError as err:
tk.error_shout(err)
raise click.Abort from err

if remove and not storage.supports(shared.Capability.REMOVE):
tk.error_shout(f"Storage {storage_name} does not support file removal")
raise click.Abort

stmt = (
sa.select(shared.File)
.join(shared.File.owner_info)
.where(shared.File.storage == storage_name)
)

files = [f for f in model.Session.scalars(stmt) if f.owner is None]

if not files:
click.echo(
f"Every owned file in storage {storage_name} has valid owner reference",
)
return

click.echo("Following files have dangling owner reference")
for file in files:
size = utils.humanize_filesize(file.size)
click.echo(
"\t{}: {} [{}, {}]. Owner: {} {}".format(
file.id,
file.name,
file.content_type,
size,
file.owner_info.owner_type,
file.owner_info.owner_id,
),
)

if remove and click.confirm("Do you want to delete these files?"):
action = tk.get_action("files_file_delete")

with click.progressbar(files) as bar:
for file in bar:
action({"ignore_auth": True}, {"id": file.id})


@group.command()
@storage_option
@click.option("--remove", is_flag=True, help="Remove files")
def missing_files(storage_name: str | None, remove: bool):
"""Manage files do not exist in storage."""
storage_name = storage_name or shared.config.default_storage()
try:
storage = shared.get_storage(storage_name)
except shared.exc.UnknownStorageError as err:
tk.error_shout(err)
raise click.Abort from err

if not storage.supports(shared.Capability.EXISTS):
tk.error_shout(
f"Storage {storage_name} does not support file availability checks",
)
raise click.Abort

if remove and not storage.supports(shared.Capability.REMOVE):
tk.error_shout(f"Storage {storage_name} does not support file removal")
raise click.Abort

stmt = sa.select(shared.File).where(shared.File.storage == storage_name)
total = model.Session.scalar(sa.select(sa.func.count()).select_from(stmt))
missing: list[shared.File] = []
with click.progressbar(model.Session.scalars(stmt), length=total) as bar:
for file in bar:
data = shared.FileData.from_model(file)
if not storage.exists(data):
missing.append(file)

if not missing:
click.echo(
f"No missing files located in storage {storage_name}",
)
return

click.echo("Following files are not found in storage")
for file in missing:
size = utils.humanize_filesize(file.size)
click.echo(
f"\t{file.id}: {file.name} [{file.content_type}, {size}]",
)

if remove and click.confirm("Do you want to delete these files?"):
action = tk.get_action("files_file_delete")

with click.progressbar(missing) as bar:
for file in bar:
action({"ignore_auth": True}, {"id": file.id})
142 changes: 142 additions & 0 deletions ckanext/files/cli/stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from __future__ import annotations

from datetime import datetime, timezone

import click
import sqlalchemy as sa
from babel.dates import format_datetime, format_timedelta

import ckan.plugins.toolkit as tk
from ckan import model

from ckanext.files import shared, utils


def _now():
return datetime.now(timezone.utc)


@click.group()
def group():
"""Storage statistics."""


storage_option = click.option(
"-s",
"--storage-name",
help="Name of the configured storage",
)


@group.command()
@storage_option
def overview(storage_name: str | None):
"""General information about storage usage."""

storage_name = storage_name or shared.config.default_storage()
stmt = sa.select(
sa.func.sum(shared.File.size),
sa.func.count(shared.File.id),
sa.func.max(shared.File.ctime),
sa.func.min(shared.File.ctime),
).where(shared.File.storage == storage_name)
row = model.Session.execute(stmt).fetchone()
size, count, newest, oldest = row if row else (0, 0, _now(), _now())

if not count:
tk.error_shout("Storage is not configured or empty")
raise click.Abort

click.secho(f"Number of files: {click.style(count, bold=True)}")
click.secho(
f"Used space: {click.style(utils.humanize_filesize(size), bold=True)}",
)
click.secho(
"Newest file created at: {} ({})".format(
click.style(format_datetime(newest), bold=True),
format_timedelta(newest - _now(), add_direction=True),
),
)
click.secho(
"Oldest file created at: {} ({})".format(
click.style(format_datetime(oldest), bold=True),
format_timedelta(oldest - _now(), add_direction=True),
),
)


@group.command()
@storage_option
def types(storage_name: str | None):
"""Files distribution by MIMEtype."""

storage_name = storage_name or shared.config.default_storage()
stmt = (
sa.select(
shared.File.content_type,
sa.func.count(shared.File.content_type).label("count"),
)
.where(shared.File.storage == storage_name)
.group_by(shared.File.content_type)
.order_by(shared.File.content_type)
)

total = model.Session.scalar(sa.select(sa.func.sum(stmt.c.count)))
click.secho(
"Storage {} contains {} files".format(
click.style(storage_name, bold=True),
click.style(total, bold=True),
),
)
for content_type, count in model.Session.execute(stmt):
click.secho(f"\t{content_type}: {click.style(count, bold=True)}")


@group.command()
@storage_option
@click.option(
"-v",
"--verbose",
is_flag=True,
help="Show distribution for every owner ID",
)
def owner(storage_name: str | None, verbose: bool):
"""Files distribution by owner."""

storage_name = storage_name or shared.config.default_storage()
owner_col = (
sa.func.concat(shared.Owner.owner_type, " ", shared.Owner.owner_id)
if verbose
else sa.func.concat(shared.Owner.owner_type, "")
)

stmt = (
sa.select(
owner_col.label("owner"),
sa.func.count(shared.File.id),
)
.where(shared.File.storage == storage_name)
.outerjoin(
shared.Owner,
sa.and_(
shared.Owner.item_id == shared.File.id,
shared.Owner.item_type == "file",
),
)
.group_by(owner_col)
).order_by(owner_col)

total = model.Session.scalar(sa.select(sa.func.sum(stmt.c.count)))
click.secho(
"Storage {} contains {} files".format(
click.style(storage_name, bold=True),
click.style(total, bold=True),
),
)
for owner, count in model.Session.execute(stmt):
click.secho(
"\t{}: {}".format(
owner.strip() or click.style("has no owner", underline=True, bold=True),
click.style(count, bold=True),
),
)
2 changes: 1 addition & 1 deletion ckanext/files/shared.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from . import types
from . import config
from . import exceptions as exc
from . import types
from .base import (
FileData,
Manager,
Expand Down
3 changes: 1 addition & 2 deletions ckanext/files/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
class UploadStream(Protocol):
def read(self, size: Any = ..., /) -> bytes: ...

def __iter__(self) -> Iterator[bytes]:
...
def __iter__(self) -> Iterator[bytes]: ...


__all__ = [
Expand Down
1 change: 1 addition & 0 deletions ckanext/files/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import ckan.plugins.toolkit as tk
from ckan import model
from ckan.lib.api_token import _get_algorithm, _get_secret # type: ignore

from ckanext.files import types

log = logging.getLogger(__name__)
Expand Down

0 comments on commit 3831390

Please sign in to comment.