diff --git a/museum_map/__main__.py b/museum_map/__main__.py index 98dcca0..cefaa56 100644 --- a/museum_map/__main__.py +++ b/museum_map/__main__.py @@ -1,4 +1,4 @@ -from .cli import cli +from museum_map.cli import cli if __name__ == "__main__": cli() diff --git a/museum_map/cli/__init__.py b/museum_map/cli/__init__.py index 168d04c..b6d162a 100644 --- a/museum_map/cli/__init__.py +++ b/museum_map/cli/__init__.py @@ -1,275 +1,253 @@ import asyncio -import click import logging import os import sys -import yaml +import click +import yaml from cerberus import Validator -from typing import Union - -from .db import db -from .groups import groups, pipeline_impl as groups_pipeline -from .server import server -from .items import items, pipeline_impl as items_pipeline -from .layout import layout, pipeline_impl as layout_pipeline -from .search import search, pipeline_impl as search_pipeline +from museum_map.cli.db import db +from museum_map.cli.groups import groups +from museum_map.cli.groups import pipeline_impl as groups_pipeline +from museum_map.cli.items import items +from museum_map.cli.items import pipeline_impl as items_pipeline +from museum_map.cli.layout import layout +from museum_map.cli.layout import pipeline_impl as layout_pipeline +from museum_map.cli.search import pipeline_impl as search_pipeline +from museum_map.cli.search import search +from museum_map.cli.server import server -logger = logging.getLogger('scr') +logger = logging.getLogger("scr") CONFIG_SCHEMA = { - 'server': { - 'type': 'dict', - 'schema': { - 'host': { - 'type': 'string', - 'default': '127.0.0.1' - }, - 'port': { - 'type': 'integer', - 'default': 6543 - }, + "server": { + "type": "dict", + "schema": { + "host": {"type": "string", "default": "127.0.0.1"}, + "port": {"type": "integer", "default": 6543}, + }, + "default": { + "host": "127.0.0.1", + "port": 6543, }, - 'default': { - 'host': '127.0.0.1', - 'port': 6543, - } }, - 'db': { - 'type': 'dict', - 'required': True, - 'schema': { - 'dsn': { - 'type': 'string', - 'required': True, - 'empty': False, + "db": { + "type": "dict", + "required": True, + "schema": { + "dsn": { + "type": "string", + "required": True, + "empty": False, }, - } + }, }, - 'search': { - 'type': 'dict', - 'required': True, - 'schema': { - 'url': { - 'type': 'string', - 'required': True, - 'empty': False, + "search": { + "type": "dict", + "required": True, + "schema": { + "url": { + "type": "string", + "required": True, + "empty": False, }, - 'key': { - 'type': 'string', - 'required': True, - 'empty': False, + "key": { + "type": "string", + "required": True, + "empty": False, }, - } + }, }, - 'data': { - 'type': 'dict', - 'required': True, - 'schema': { - 'topic_fields': { - 'type': 'list', - 'required': True, - 'minlength': 1, - 'schema': { - 'type': 'string', - 'empty': False, - } + "data": { + "type": "dict", + "required": True, + "schema": { + "topic_fields": { + "type": "list", + "required": True, + "minlength": 1, + "schema": { + "type": "string", + "empty": False, + }, }, - 'hierarchy': { - 'type': 'dict', - 'required': True, - 'schema': { - 'field': { - 'type': 'string', - 'required': True, - 'empty': False, + "hierarchy": { + "type": "dict", + "required": True, + "schema": { + "field": { + "type": "string", + "required": True, + "empty": False, }, - 'expansions': { - 'type': 'list', - 'required': False, - 'default': [], - 'schema': { - 'type': 'string', - 'allowed': ['nlp', 'aat'], - } - } - } + "expansions": { + "type": "list", + "required": False, + "default": [], + "schema": { + "type": "string", + "allowed": ["nlp", "aat"], + }, + }, + }, }, - 'year_field': { - 'type': 'string', - 'required': True, - 'empty': False, - } - } + "year_field": { + "type": "string", + "required": True, + "empty": False, + }, + }, }, - 'images': { - 'type': 'dict', - 'required': True, - 'schema': { - 'basepath': { - 'type': 'string', - 'required': True, - 'empty': False, + "images": { + "type": "dict", + "required": True, + "schema": { + "basepath": { + "type": "string", + "required": True, + "empty": False, } - } + }, }, - 'layout': { - 'type': 'dict', - 'required': True, - 'schema': { - 'rooms': { - 'type': 'list', - 'required': True, - 'minlength': 1, - 'schema': { - 'type': 'dict', - 'schema': { - 'id': { - 'type': 'string', - 'required': True, - 'empty': False, + "layout": { + "type": "dict", + "required": True, + "schema": { + "rooms": { + "type": "list", + "required": True, + "minlength": 1, + "schema": { + "type": "dict", + "schema": { + "id": { + "type": "string", + "required": True, + "empty": False, }, - 'direction': { - 'type': 'string', - 'required': True, - 'allowed': ['vert', 'horiz'], + "direction": { + "type": "string", + "required": True, + "allowed": ["vert", "horiz"], }, - 'items': { - 'type': 'integer', - 'required': True, - 'min': 1, + "items": { + "type": "integer", + "required": True, + "min": 1, }, - 'splits': { - 'type': 'integer', - 'required': True, - 'min': 1, + "splits": { + "type": "integer", + "required": True, + "min": 1, }, - 'position': { - 'type': 'dict', - 'required': True, - 'schema': { - 'x': { - 'type': 'integer', - 'required': True - }, - 'y': { - 'type': 'integer', - 'required': True - }, - 'width': { - 'type': 'integer', - 'required': True - }, - 'height': { - 'type': 'integer', - 'required': True - }, - } - } - } - } + "position": { + "type": "dict", + "required": True, + "schema": { + "x": {"type": "integer", "required": True}, + "y": {"type": "integer", "required": True}, + "width": {"type": "integer", "required": True}, + "height": {"type": "integer", "required": True}, + }, + }, + }, + }, } - } + }, }, - 'app': { - 'type': 'dict', - 'required': True, - 'schema': { - 'base_url': { - 'type': 'string', - 'required': True, - 'empty': False, - }, - 'intro': { - 'type': 'string', - 'required': True, - 'empty': False + "app": { + "type": "dict", + "required": True, + "schema": { + "base_url": { + "type": "string", + "required": True, + "empty": False, }, - 'footer': { - 'type': 'dict', - 'schema': { - 'center': { - 'type': 'dict', - 'schema': { - 'label': { - 'type': 'string', - 'required': True, - 'empty': False, + "intro": {"type": "string", "required": True, "empty": False}, + "footer": { + "type": "dict", + "schema": { + "center": { + "type": "dict", + "schema": { + "label": { + "type": "string", + "required": True, + "empty": False, }, - 'url': { - 'type': 'string', - 'required': False, - } - } + "url": { + "type": "string", + "required": False, + }, + }, }, - 'right': { - 'type': 'dict', - 'schema': { - 'label': { - 'type': 'string', - 'required': True, - 'empty': False, + "right": { + "type": "dict", + "schema": { + "label": { + "type": "string", + "required": True, + "empty": False, + }, + "url": { + "type": "string", + "required": False, }, - 'url': { - 'type': 'string', - 'required': False, - } - } - } - } + }, + }, + }, }, - 'item': { - 'type': 'dict', - 'required': True, - 'schema': { - 'texts': { - 'type': 'list', - 'schema': { - 'type': 'dict', - 'schema': { - 'name': { - 'type': 'string', - 'required': True, - 'empty': False, + "item": { + "type": "dict", + "required": True, + "schema": { + "texts": { + "type": "list", + "schema": { + "type": "dict", + "schema": { + "name": { + "type": "string", + "required": True, + "empty": False, }, - 'label': { - 'type': 'string', - 'required': True, - 'empty': False, - } - } - } + "label": { + "type": "string", + "required": True, + "empty": False, + }, + }, + }, }, - 'fields': { - 'type': 'list', - 'schema': { - 'type': 'dict', - 'schema': { - 'name': { - 'type': 'string', - 'required': True, - 'empty': False, + "fields": { + "type": "list", + "schema": { + "type": "dict", + "schema": { + "name": { + "type": "string", + "required": True, + "empty": False, }, - 'label': { - 'type': 'string', - 'required': True, - 'empty': False, - } - } - } - } - } - } - } + "label": { + "type": "string", + "required": True, + "empty": False, + }, + }, + }, + }, + }, + }, + }, }, - 'debug': { - 'type': 'boolean', - 'default': False, + "debug": { + "type": "boolean", + "default": False, }, - 'logging': { - 'type': 'dict' - } + "logging": {"type": "dict"}, } @@ -287,10 +265,10 @@ def validate_config(config: dict) -> dict: else: error_list = [] - def walk_error_tree(err: Union[dict, list], path: str) -> None: + def walk_error_tree(err: dict | list, path: str) -> None: if isinstance(err, dict): for key, value in err.items(): - walk_error_tree(value, path + (str(key), )) + walk_error_tree(value, (*path, str(key))) elif isinstance(err, list): for sub_err in err: walk_error_tree(sub_err, path) @@ -298,13 +276,14 @@ def walk_error_tree(err: Union[dict, list], path: str) -> None: error_list.append(f'{".".join(path)}: {err}') walk_error_tree(validator.errors, ()) - error_str = '\n'.join(error_list) - raise click.ClickException(f'Configuration errors:\n\n{error_str}') + error_str = "\n".join(error_list) + msg = f"Configuration errors:\n\n{error_str}" + raise click.ClickException(msg) @click.group() -@click.option('-v', '--verbose', count=True) -@click.option('-c', '--config', default='production.yml') +@click.option("-v", "--verbose", count=True) +@click.option("-c", "--config", default="production.yml") @click.pass_context def cli(ctx, verbose, config): """Museum Map CLI""" @@ -313,13 +292,13 @@ def cli(ctx, verbose, config): logging.basicConfig(level=logging.INFO) elif verbose > 1: logging.basicConfig(level=logging.DEBUG) - logger.debug('Logging set up') + logger.debug("Logging set up") if not os.path.exists(config): - logger.error(f'Configuration file {config} not found') + logger.error(f"Configuration file {config} not found") sys.exit(1) with open(config) as in_f: - config = yaml.load(in_f, Loader=yaml.FullLoader) - ctx.obj['config'] = validate_config(config) + config = yaml.safe_load(in_f) + ctx.obj["config"] = validate_config(config) async def pipeline_impl(config): @@ -334,7 +313,7 @@ async def pipeline_impl(config): @click.pass_context def pipeline(ctx): """Run the full processing pipline.""" - asyncio.run(pipeline_impl(ctx.obj['config'])) + asyncio.run(pipeline_impl(ctx.obj["config"])) cli.add_command(pipeline) diff --git a/museum_map/cli/db.py b/museum_map/cli/db.py index bdf132f..e65d669 100644 --- a/museum_map/cli/db.py +++ b/museum_map/cli/db.py @@ -1,13 +1,13 @@ import asyncio -import click import json import os import shutil import subprocess -from museum_map.cli.util import ClickIndeterminate +import click -from ..models import create_engine, create_sessionmaker, Base, Item +from museum_map.cli.util import ClickIndeterminate +from museum_map.models import Base, Item, create_engine, create_sessionmaker async def init_impl(config, drop_existing): @@ -20,21 +20,21 @@ async def init_impl(config, drop_existing): @click.command() -@click.option('--drop-existing', is_flag=True, help='Drop any existing tables.') +@click.option("--drop-existing", is_flag=True, help="Drop any existing tables.") @click.pass_context def init(ctx, drop_existing): """Initialise the database.""" - asyncio.run(init_impl(ctx.obj['config'], drop_existing)) + asyncio.run(init_impl(ctx.obj["config"], drop_existing)) async def load_impl(config, source): """Load the metadata.""" - progress = ClickIndeterminate('Loading items') + progress = ClickIndeterminate("Loading items") progress.start() async with create_sessionmaker(config)() as dbsession: for basepath, _, filenames in os.walk(source): for filename in filenames: - if filename.endswith('.json'): + if filename.endswith(".json"): with open(os.path.join(basepath, filename)) as in_f: dbsession.add(Item(attributes=json.load(in_f))) await dbsession.commit() @@ -42,31 +42,51 @@ async def load_impl(config, source): @click.command() -@click.argument('source') +@click.argument("source") @click.pass_context def load(ctx, source): """Load the metadata.""" - asyncio.run(load_impl(ctx.obj['config'], source)) + asyncio.run(load_impl(ctx.obj["config"], source)) @click.command() -@click.argument('source') -@click.argument('target') +@click.argument("source") +@click.argument("target") @click.pass_context -def load_images(ctx, source, target): +def load_images(ctx, source, target): # noqa: ARG001 """Load and convert images.""" - progress = ClickIndeterminate('Loading images') + progress = ClickIndeterminate("Loading images") progress.start() for basepath, _, filenames in os.walk(source): for filename in filenames: - if filename.endswith('.jpg'): - image_id = filename[:filename.find('.')] + if filename.endswith(".jpg"): + image_id = filename[: filename.find(".")] os.makedirs(os.path.join(target, *image_id), exist_ok=True) image_source = os.path.join(basepath, filename) image_target = os.path.join(target, *image_id, filename) shutil.copy(image_source, image_target) - subprocess.run(['gm', 'convert', image_source, '-resize', '240x240', image_target.replace('.jpg', '-240.jpg')]) - subprocess.run(['gm', 'convert', image_source, '-resize', '320x320', image_target.replace('.jpg', '-320.jpg')]) + subprocess.run( + [ # noqa: S603 S607 + "gm", + "convert", + image_source, + "-resize", + "240x240", + image_target.replace(".jpg", "-240.jpg"), + ], + check=True, + ) + subprocess.run( + [ # noqa: S603 S607 + "gm", + "convert", + image_source, + "-resize", + "320x320", + image_target.replace(".jpg", "-320.jpg"), + ], + check=True, + ) progress.stop() diff --git a/museum_map/cli/groups.py b/museum_map/cli/groups.py index 42f381d..1525461 100644 --- a/museum_map/cli/groups.py +++ b/museum_map/cli/groups.py @@ -1,56 +1,60 @@ import asyncio -import click -import inflection import math - from collections import Counter + +import click +import inflection from numpy import array from scipy.spatial.distance import cosine -from sqlalchemy import and_, or_, func +from sqlalchemy import and_, func, or_ from sqlalchemy.future import select from sqlalchemy.orm import selectinload -from .items import apply_aat, apply_nlp -from .util import ClickIndeterminate -from ..models import Group, Item, create_sessionmaker +from museum_map.cli.items import apply_aat, apply_nlp +from museum_map.cli.util import ClickIndeterminate +from museum_map.models import Group, Item, create_sessionmaker async def generate_groups_impl(config): """Generate the basic groups.""" async with create_sessionmaker(config)() as dbsession: - item_stmt = select(Item).filter(Item.group_id == None) - count_stmt = select(func.count(Item.id)).filter(Item.group_id == None) + item_stmt = select(Item).filter(Item.group_id is None) + count_stmt = select(func.count(Item.id)).filter(Item.group_id is None) count = await dbsession.execute(count_stmt) result = await dbsession.execute(item_stmt) categories = [] - with click.progressbar(result.scalars(), length=count.scalar_one(), label='Generating potential groups') as progress: + with click.progressbar( + result.scalars(), length=count.scalar_one(), label="Generating potential groups" + ) as progress: for item in progress: - for category in item.attributes['_categories']: + for category in item.attributes["_categories"]: categories.append(category.lower()) - counts = [(cat, count) for cat, count in Counter(categories).most_common() if count >= 15] + counts = [(cat, count) for cat, count in Counter(categories).most_common() if count >= 15] # noqa: PLR2004 counts.sort(key=lambda c: c[1]) max_groups = len(counts) - with click.progressbar(length=max_groups, label='Generating groups') as progress: + with click.progressbar(length=max_groups, label="Generating groups") as progress: while counts: category = counts[0][0] group_stmt = select(Group).filter(Group.value == category) result = await dbsession.execute(group_stmt) group = result.scalars().first() if group is None: - group = Group(value=category, label=category[0].upper() + category[1:], split='basic') + group = Group(value=category, label=category[0].upper() + category[1:], split="basic") dbsession.add(group) result = await dbsession.execute(item_stmt) for item in result.scalars(): - if category in item.attributes['_categories']: + if category in item.attributes["_categories"]: item.group = group await dbsession.commit() categories = [] result = await dbsession.execute(item_stmt) for item in result.scalars(): - for category in item.attributes['_categories']: + for category in item.attributes["_categories"]: categories.append(category.lower()) old_counts = len(counts) - counts = [(cat, count) for cat, count in Counter(categories).most_common() if count >= 15] + counts = [ + (cat, count) for cat, count in Counter(categories).most_common() if count >= 15 # noqa: PLR2004 + ] counts.sort(key=lambda c: c[1]) progress.update(old_counts - len(counts)) await dbsession.commit() @@ -60,13 +64,13 @@ async def generate_groups_impl(config): @click.pass_context def generate_groups(ctx): """Generate the basic groups.""" - asyncio.run(generate_groups_impl(ctx.obj['config'])) + asyncio.run(generate_groups_impl(ctx.obj["config"])) def fill_vector(group): """Create a full vector from a sparse vector in the database.""" vec = array([0 for _ in range(0, 300)], dtype=float) - for dim, value in group.attributes['lda_vector']: + for dim, value in group.attributes["lda_vector"]: vec[dim] = value return vec @@ -91,12 +95,12 @@ def split_by_similarity(dbsession, group): if next_item: sorted_items.append(next_item) limit = len(group.items) / math.ceil(len(group.items) / 100) - new_group = Group(value=group.value, label=group.label, parent=group, split='similar') + new_group = Group(value=group.value, label=group.label, parent=group, split="similar") dbsession.add(new_group) count = 0 for item in sorted_items: if count > limit: - new_group = Group(value=group.value, label=group.label, parent=group, split='similar') + new_group = Group(value=group.value, label=group.label, parent=group, split="similar") dbsession.add(new_group) count = 0 item.group = new_group @@ -109,7 +113,9 @@ def split_by_attribute(dbsession, group, attr): for item in group.items: if attr in item.attributes and item.attributes[attr]: values.extend(item.attributes[attr]) - categories = [(v, c) for v, c in Counter(values).most_common() if c < len(group.items) * 0.6666 and c >= 15] + categories = [ + (v, c) for v, c in Counter(values).most_common() if c < len(group.items) * 0.6666 and c >= 15 # noqa: PLR2004 + ] if categories: category_values = [v for v, _ in categories] has_values = 0 @@ -121,15 +127,17 @@ def split_by_attribute(dbsession, group, attr): break if found: has_values = has_values + 1 - if has_values / len(group.items) > 0.9: + if has_values / len(group.items) > 0.9: # noqa: PLR2004 categories.reverse() for category in categories: - new_group = Group(value=category[0], label=f'{group.label} - {category[0]}', parent=group, split='attribute') + new_group = Group( + value=category[0], label=f"{group.label} - {category[0]}", parent=group, split="attribute" + ) dbsession.add(new_group) for item in list(group.items): if category[0] in item.attributes[attr]: item.group = new_group - new_group = Group(value=group.label, label=group.label, parent=group, split='attribute') + new_group = Group(value=group.label, label=group.label, parent=group, split="attribute") dbsession.add(new_group) for item in list(group.items): item.group = new_group @@ -144,30 +152,35 @@ def split_by_year(config, dbsession, group): centuries = [] with_year = 0 for item in group.items: - if config['data']['year_field'] in item.attributes and item.attributes[config['data']['year_field']]: - years.append(item.attributes[config['data']['year_field']]) + if config["data"]["year_field"] in item.attributes and item.attributes[config["data"]["year_field"]]: + years.append(item.attributes[config["data"]["year_field"]]) with_year = with_year + 1 - if with_year / len(group.items) > 0.95: + if with_year / len(group.items) > 0.95: # noqa: PLR2004 common = [(int(v), c) for v, c in Counter(years).most_common()] start_year = min([c for c, _ in common]) end_year = max([c for c, _ in common]) - if (start_year != end_year): - year_boundaries = [] - if (end_year - start_year) <= 100 and (end_year - start_year) > 10: + if start_year != end_year: + if (end_year - start_year) <= 100 and (end_year - start_year) > 10: # noqa: PLR2004 start_decade = math.floor(start_year / 10) end_decade = math.floor(end_year / 10) decades = [] for start_year in range(start_decade * 10, (end_decade + 1) * 10, 10): for item in list(group.items): - if config['data']['year_field'] in item.attributes and item.attributes[config['data']['year_field']]: - if start_year <= int(item.attributes[config['data']['year_field']]) and int(item.attributes[config['data']['year_field']]) < start_year + 10: + if ( + config["data"]["year_field"] in item.attributes + and item.attributes[config["data"]["year_field"]] + ): + if ( + start_year <= int(item.attributes[config["data"]["year_field"]]) + and int(item.attributes[config["data"]["year_field"]]) < start_year + 10 + ): if len(decades) == 0 or decades[-1][0][0] != start_year: decades.append([[start_year], 1]) else: decades[-1][1] = decades[-1][1] + 1 idx = 0 while idx < len(decades) - 1: - if decades[idx][1] + decades[idx + 1][1] < 100: + if decades[idx][1] + decades[idx + 1][1] < 100: # noqa: PLR2004 decades[idx][0].extend(decades[idx + 1][0]) decades[idx][1] = decades[idx][1] + decades[idx + 1][1] decades.pop(idx + 1) @@ -176,37 +189,54 @@ def split_by_year(config, dbsession, group): for years, _ in decades: new_group = None for item in list(group.items): - if config['data']['year_field'] in item.attributes and item.attributes[config['data']['year_field']]: - if years[0] <= int(item.attributes[config['data']['year_field']]) and int(item.attributes[config['data']['year_field']]) < years[-1] + 10: + if ( + config["data"]["year_field"] in item.attributes + and item.attributes[config["data"]["year_field"]] + ): + if ( + years[0] <= int(item.attributes[config["data"]["year_field"]]) + and int(item.attributes[config["data"]["year_field"]]) < years[-1] + 10 + ): if new_group is None: if len(years) == 1: - label = f'{years[0]}s' + label = f"{years[0]}s" else: - label = f'{years[0]}s-{years[-1]}s' - new_group = Group(value=str(start_year), label=f'{group.label} - {label}', parent=group, split='time') + label = f"{years[0]}s-{years[-1]}s" + new_group = Group( + value=str(start_year), + label=f"{group.label} - {label}", + parent=group, + split="time", + ) dbsession.add(new_group) item.group = new_group if group.items: - new_group = Group(value=group.label, label=group.label, parent=group, split='time') + new_group = Group(value=group.label, label=group.label, parent=group, split="time") dbsession.add(new_group) for item in list(group.items): item.group = new_group return True - elif (end_year - start_year) > 100: + elif (end_year - start_year) > 100: # noqa: PLR2004 start_century = math.floor(start_year / 100) end_century = math.floor(end_year / 100) centuries = [] for start_year in range(start_century * 100, (end_century + 1) * 100, 100): for item in list(group.items): - if config['data']['year_field'] in item.attributes and item.attributes[config['data']['year_field']]: - if start_year <= int(item.attributes[config['data']['year_field']]) and int(item.attributes[config['data']['year_field']]) < start_year + 100: + if ( + config["data"]["year_field"] in item.attributes + and item.attributes[config["data"]["year_field"]] + ): + if ( + start_year <= int(item.attributes[config["data"]["year_field"]]) + and int(item.attributes[config["data"]["year_field"]]) < start_year + 100 + ): if len(centuries) == 0 or centuries[-1][0][0] != start_year: centuries.append([[start_year], 1]) else: centuries[-1][1] = centuries[-1][1] + 1 idx = 0 while idx < len(centuries) - 1: - if centuries[idx][1] + centuries[idx + 1][1] < 100: + if centuries[idx][1] + centuries[idx + 1][1] < 100: # noqa: PLR2004 centuries[idx][0].extend(centuries[idx + 1][0]) centuries[idx][1] = centuries[idx][1] + centuries[idx + 1][1] centuries.pop(idx + 1) @@ -215,44 +245,55 @@ def split_by_year(config, dbsession, group): for years, _ in centuries: new_group = None for item in list(group.items): - if config['data']['year_field'] in item.attributes and item.attributes[config['data']['year_field']]: - if years[0] <= int(item.attributes[config['data']['year_field']]) and int(item.attributes[config['data']['year_field']]) < years[-1] + 100: + if ( + config["data"]["year_field"] in item.attributes + and item.attributes[config["data"]["year_field"]] + ): + if ( + years[0] <= int(item.attributes[config["data"]["year_field"]]) + and int(item.attributes[config["data"]["year_field"]]) < years[-1] + 100 + ): if new_group is None: if len(years) == 1: century = math.floor(years[0] / 100) + 1 - if century % 10 == 1 and century != 11: - label = f'{century}st' - elif century % 10 == 2 and century != 12: - label = f'{century}nd' - elif century % 10 == 3 and century != 13: - label = f'{century}rd' + if century % 10 == 1 and century != 11: # noqa: PLR2004 + label = f"{century}st" + elif century % 10 == 2 and century != 12: # noqa: PLR2004 + label = f"{century}nd" + elif century % 10 == 3 and century != 13: # noqa: PLR2004 + label = f"{century}rd" else: - label = f'{century}th' + label = f"{century}th" else: century = math.floor(years[0] / 100) + 1 - if century % 10 == 1 and century != 11: - start_label = f'{century}st' - elif century % 10 == 2 and century != 12: - start_label = f'{century}nd' - elif century % 10 == 3 and century != 13: - start_label = f'{century}rd' + if century % 10 == 1 and century != 11: # noqa: PLR2004 + start_label = f"{century}st" + elif century % 10 == 2 and century != 12: # noqa: PLR2004 + start_label = f"{century}nd" + elif century % 10 == 3 and century != 13: # noqa: PLR2004 + start_label = f"{century}rd" else: - start_label = f'{century}th' + start_label = f"{century}th" century = math.floor(years[-1] / 100) + 1 - if century % 10 == 1 and century != 11: - end_label = f'{century}st' - elif century % 10 == 2 and century != 12: - end_label = f'{century}nd' - elif century % 10 == 3 and century != 13: - end_label = f'{century}rd' + if century % 10 == 1 and century != 11: # noqa: PLR2004 + end_label = f"{century}st" + elif century % 10 == 2 and century != 12: # noqa: PLR2004 + end_label = f"{century}nd" + elif century % 10 == 3 and century != 13: # noqa: PLR2004 + end_label = f"{century}rd" else: - end_label = f'{century}th' - label = f'{start_label}-{end_label}' - new_group = Group(value=str(start_year), label=f'{group.label} - {label} century', parent=group, split='time') + end_label = f"{century}th" + label = f"{start_label}-{end_label}" + new_group = Group( + value=str(start_year), + label=f"{group.label} - {label} century", + parent=group, + split="time", + ) dbsession.add(new_group) item.group = new_group if group.items: - new_group = Group(value=group.label, label=group.label, parent=group, split='time') + new_group = Group(value=group.label, label=group.label, parent=group, split="time") dbsession.add(new_group) for item in list(group.items): item.group = new_group @@ -263,7 +304,7 @@ def split_by_year(config, dbsession, group): async def split_large_groups_impl(config): """Split large groups into smaller ones.""" async with create_sessionmaker(config)() as dbsession: - progress = ClickIndeterminate('Splitting large groups') + progress = ClickIndeterminate("Splitting large groups") progress.start() splitting = True stmt = select(Group).options(selectinload(Group.items), selectinload(Group.children)) @@ -272,20 +313,20 @@ async def split_large_groups_impl(config): result = await dbsession.execute(stmt) for group in result.scalars(): if len(group.children) == 0: - if len(group.items) > 120 and len(group.items) < 300: + if len(group.items) > 120 and len(group.items) < 300: # noqa: PLR2004 if split_by_year(config, dbsession, group): splitting = True else: split_by_similarity(dbsession, group) splitting = True - elif len(group.items) >= 300: - if split_by_attribute(dbsession, group, 'concepts'): + elif len(group.items) >= 300: # noqa: PLR2004 + if split_by_attribute(dbsession, group, "concepts"): splitting = True - elif split_by_attribute(dbsession, group, 'subjects'): + elif split_by_attribute(dbsession, group, "subjects"): splitting = True - elif split_by_attribute(dbsession, group, 'materials'): + elif split_by_attribute(dbsession, group, "materials"): splitting = True - elif split_by_attribute(dbsession, group, 'techniques'): + elif split_by_attribute(dbsession, group, "techniques"): splitting = True elif split_by_year(config, dbsession, group): splitting = True @@ -300,13 +341,13 @@ async def split_large_groups_impl(config): @click.pass_context def split_large_groups(ctx): """Split large groups into smaller ones.""" - asyncio.run(split_large_groups_impl(ctx.obj['config'])) + asyncio.run(split_large_groups_impl(ctx.obj["config"])) async def merge_singular_plural_impl(config): """Merge singular and plural groups.""" async with create_sessionmaker(config)() as dbsession: - progress = ClickIndeterminate('Merging singular and plural') + progress = ClickIndeterminate("Merging singular and plural") progress.start() modifying = True while modifying: @@ -314,8 +355,11 @@ async def merge_singular_plural_impl(config): stmt = select(Group) result = await dbsession.execute(stmt) for group in result.scalars(): - stmt = select(Group).filter(and_(Group.value == inflection.singularize(group.value), - Group.id != group.id)).options(selectinload(Group.items)) + stmt = ( + select(Group) + .filter(and_(Group.value == inflection.singularize(group.value), Group.id != group.id)) + .options(selectinload(Group.items)) + ) result = await dbsession.execute(stmt) other = result.scalars().first() if other: @@ -333,19 +377,21 @@ async def merge_singular_plural_impl(config): @click.pass_context def merge_singular_plural(ctx): """Merge singular and plural groups.""" - asyncio.run(merge_singular_plural_impl(ctx.obj['config'])) + asyncio.run(merge_singular_plural_impl(ctx.obj["config"])) async def add_parent_groups_impl(config): """Add any required parent groups.""" async with create_sessionmaker(config)() as dbsession: - stmt = select(Group).filter(Group.parent_id == None).options(selectinload(Group.parent)) + stmt = select(Group).filter(Group.parent_id is None).options(selectinload(Group.parent)) result = await dbsession.execute(stmt) - stmt = select(func.count(Group.id)).filter(Group.parent_id == None) + stmt = select(func.count(Group.id)).filter(Group.parent_id is None) result_count = await dbsession.execute(stmt) - with click.progressbar(result.scalars(), length=result_count.scalar_one(), label='Adding parent groups') as progress: + with click.progressbar( + result.scalars(), length=result_count.scalar_one(), label="Adding parent groups" + ) as progress: for group in progress: - if 'aat' in config['data']['hierarchy']['expansions']: + if "aat" in config["data"]["hierarchy"]["expansions"]: categories = apply_aat(group.value, merge=False) if categories: for category_list in categories: @@ -355,11 +401,13 @@ async def add_parent_groups_impl(config): result = await dbsession.execute(stmt) parent_group = result.scalars().first() if not parent_group: - parent_group = Group(value=category, label=category[0].upper() + category[1:], split='parent') + parent_group = Group( + value=category, label=category[0].upper() + category[1:], split="parent" + ) dbsession.add(group) group.parent = parent_group mapped = True - group = parent_group + group = parent_group # noqa: PLW2901 if group.parent_id: break if mapped: @@ -367,7 +415,9 @@ async def add_parent_groups_impl(config): else: mapped = False for category in apply_nlp(group.value): - stmt = select(Group).filter(or_(Group.value == category, Group.value == inflection.pluralize(category))) + stmt = select(Group).filter( + or_(Group.value == category, Group.value == inflection.pluralize(category)) + ) result = await dbsession.execute(stmt) parent_group = result.scalars().first() if parent_group: @@ -376,13 +426,17 @@ async def add_parent_groups_impl(config): mapped = True break if not mapped: - if group.value not in ['styles and periods']: + if group.value not in ["styles and periods"]: for category in apply_nlp(group.value): hierarchies = apply_aat(category, merge=False) groups = [] for hierarchy in hierarchies: if group.value not in hierarchy: - stmt = select(Group).filter(Group.value.in_(hierarchy)).options(selectinload(Group.items)) + stmt = ( + select(Group) + .filter(Group.value.in_(hierarchy)) + .options(selectinload(Group.items)) + ) result = await dbsession.execute(stmt) for potential_group in result.scalars(): depth = 0 @@ -402,13 +456,13 @@ async def add_parent_groups_impl(config): @click.pass_context def add_parent_groups(ctx): """Add any required parent groups.""" - asyncio.run(add_parent_groups_impl(ctx.obj['config'])) + asyncio.run(add_parent_groups_impl(ctx.obj["config"])) async def prune_single_groups_impl(config): """Remove groups that have a single child and no items.""" async with create_sessionmaker(config)() as dbsession: - progress = ClickIndeterminate('Pruning single groups') + progress = ClickIndeterminate("Pruning single groups") progress.start() pruning = True stmt = select(Group).options(selectinload(Group.children), selectinload(Group.items)) @@ -429,13 +483,13 @@ async def prune_single_groups_impl(config): @click.pass_context def prune_single_groups(ctx): """Remove groups that have a single child and no items.""" - asyncio.run(prune_single_groups_impl(ctx.obj['config'])) + asyncio.run(prune_single_groups_impl(ctx.obj["config"])) async def move_inner_items_impl(config): """Move items from non-leaf groups into extra leaf groups.""" async with create_sessionmaker(config)() as dbsession: - progress = ClickIndeterminate('Moving inner items') + progress = ClickIndeterminate("Moving inner items") progress.start() moving = True stmt = select(Group).options(selectinload(Group.children), selectinload(Group.items)) @@ -444,7 +498,7 @@ async def move_inner_items_impl(config): result = await dbsession.execute(stmt) for group in result.scalars(): if len(group.items) > 0 and len(group.children) > 0: - sub_group = Group(value=group.value, label=group.label, split='inner') + sub_group = Group(value=group.value, label=group.label, split="inner") dbsession.add(sub_group) sub_group.parent = group for item in list(group.items): @@ -461,7 +515,7 @@ async def move_inner_items_impl(config): @click.pass_context def move_inner_items(ctx): """Move items from non-leaf groups into extra leaf groups.""" - asyncio.run(move_inner_items_impl(ctx.obj['config'])) + asyncio.run(move_inner_items_impl(ctx.obj["config"])) async def pipeline_impl(config): @@ -478,7 +532,7 @@ async def pipeline_impl(config): @click.pass_context def pipeline(ctx): """Run the group processing pipeline.""" - asyncio.run(pipeline_impl(ctx.obj['config'])) + asyncio.run(pipeline_impl(ctx.obj["config"])) @click.group() diff --git a/museum_map/cli/items.py b/museum_map/cli/items.py index b4df5ae..19f7e60 100644 --- a/museum_map/cli/items.py +++ b/museum_map/cli/items.py @@ -1,17 +1,17 @@ import asyncio -import click import json import os + +import click import requests import spacy - from gensim import corpora, models from lxml import etree from sqlalchemy import func from sqlalchemy.future import select -from ..models import create_sessionmaker, Item -from .util import ClickIndeterminate +from museum_map.cli.util import ClickIndeterminate +from museum_map.models import Item, create_sessionmaker async def tokenise_impl(config): @@ -20,16 +20,16 @@ async def tokenise_impl(config): async with create_sessionmaker(config)() as dbsession: count = await dbsession.execute(select(func.count(Item.id))) result = await dbsession.execute(select(Item)) - with click.progressbar(result.scalars(), length=count.scalar_one(), label='Tokenising items') as progress: + with click.progressbar(result.scalars(), length=count.scalar_one(), label="Tokenising items") as progress: for item in progress: - text = '' - for field in config['data']['topic_fields']: + text = "" + for field in config["data"]["topic_fields"]: if field in item.attributes and item.attributes[field].strip(): - if item.attributes[field].strip().endswith('.'): - text = f'{text} {item.attributes[field].strip()}' + if item.attributes[field].strip().endswith("."): + text = f"{text} {item.attributes[field].strip()}" else: - text = f'{text} {item.attributes[field].strip()}.' - item.attributes['_tokens'] = [t.lemma_ for t in nlp(text) if not t.pos_ in ['PUNCT', 'SPACE']] + text = f"{text} {item.attributes[field].strip()}." + item.attributes["_tokens"] = [t.lemma_ for t in nlp(text) if t.pos_ not in ["PUNCT", "SPACE"]] await dbsession.commit() @@ -37,65 +37,65 @@ async def tokenise_impl(config): @click.pass_context def tokenise(ctx): """Generate token lists for each item.""" - asyncio.run(tokenise_impl(ctx.obj['config'])) + asyncio.run(tokenise_impl(ctx.obj["config"])) def strip_article(text): """Strip any indefinite article from the beginning of the text.""" text = text.strip() - if text.startswith('a '): - return text[2:].strip().strip('()[]') - elif text.startswith('an '): - return text[3:].strip().strip('()[]') + if text.startswith("a "): + return text[2:].strip().strip("()[]") + elif text.startswith("an "): + return text[3:].strip().strip("()[]") else: - return text.strip('()[]') + return text.strip("()[]") def apply_nlp(category): """Recursively apply the NLP processing rules.""" - if ' ' in category: - if ' for ' in category: - idx = category.find(' for ') + if " " in category: + if " for " in category: + idx = category.find(" for ") prefix = strip_article(category[:idx]) - suffix = strip_article(category[idx + 5:]) - return [suffix, prefix] + apply_nlp(suffix) + apply_nlp(prefix) - elif '(' in category: - start = category.find('(') - end = category.find(')') - outer = strip_article((category[:start] + ' ' + category[end + 1:])) - inner = strip_article(category[start + 1:end]) - return [outer, inner] + apply_nlp(outer) + apply_nlp(inner) - elif ' with ' in category: - idx = category.find(' with ') + suffix = strip_article(category[idx + 5 :]) + return [suffix, prefix, *apply_nlp(suffix), *apply_nlp(prefix)] + elif "(" in category: + start = category.find("(") + end = category.find(")") + outer = strip_article(category[:start] + " " + category[end + 1 :]) + inner = strip_article(category[start + 1 : end]) + return [outer, inner, *apply_nlp(outer), *apply_nlp(inner)] + elif " with " in category: + idx = category.find(" with ") prefix = strip_article(category[:idx]) - suffix = strip_article(category[idx + 6:]) - return [prefix, suffix] + apply_nlp(prefix) + apply_nlp(suffix) - elif ' of ' in category: - idx = category.find(' of ') + suffix = strip_article(category[idx + 6 :]) + return [prefix, suffix, *apply_nlp(prefix), *apply_nlp(suffix)] + elif " of " in category: + idx = category.find(" of ") prefix = strip_article(category[:idx]) - suffix = strip_article(category[idx + 4:]) - if prefix in ['pair', 'copy', 'base', 'fragments', 'figure', 'copy']: - return [suffix] + apply_nlp(suffix) + suffix = strip_article(category[idx + 4 :]) + if prefix in ["pair", "copy", "base", "fragments", "figure", "copy"]: + return [suffix, *apply_nlp(suffix)] else: - return [suffix, prefix] + apply_nlp(suffix) + apply_nlp(prefix) - elif ' from ' in category: - idx = category.find(' from ') + return [suffix, prefix, *apply_nlp(suffix), *apply_nlp(prefix)] + elif " from " in category: + idx = category.find(" from ") prefix = strip_article(category[:idx]) - suffix = strip_article(category[idx + 4:]) - if prefix in ['pair', 'copy', 'base', 'fragments', 'figure', 'copy']: - return [suffix] + apply_nlp(suffix) + suffix = strip_article(category[idx + 4 :]) + if prefix in ["pair", "copy", "base", "fragments", "figure", "copy"]: + return [suffix, *apply_nlp(suffix)] else: - return [suffix, prefix] + apply_nlp(suffix) + apply_nlp(prefix) - elif '&' in category: - categories = [strip_article(c) for c in category.split('&')] + return [suffix, prefix, *apply_nlp(suffix), *apply_nlp(prefix)] + elif "&" in category: + categories = [strip_article(c) for c in category.split("&")] for cat in list(categories): categories = categories + apply_nlp(cat) return categories - elif ' and ' in category or ',' in category: + elif " and " in category or "," in category: categories = [] - while ' and ' in category or ',' in category: - and_idx = category.find(' and ') - comma_idx = category.find(',') + while " and " in category or "," in category: + and_idx = category.find(" and ") + comma_idx = category.find(",") if and_idx >= 0 and comma_idx >= 0: idx = min(and_idx, comma_idx) elif and_idx >= 0: @@ -106,65 +106,69 @@ def apply_nlp(category): idx = -1 if idx >= 0: categories.append(strip_article(category[:idx])) - if category[idx] == ',': - category = category[idx + 1:] + if category[idx] == ",": + category = category[idx + 1 :] else: - category = category[idx + 5:] - if category.strip().strip('()[]'): - categories.append(strip_article(category.strip().strip('()[]'))) + category = category[idx + 5 :] + if category.strip().strip("()[]"): + categories.append(strip_article(category.strip().strip("()[]"))) for cat in list(categories): categories = categories + apply_nlp(cat) return categories - elif ' or ' in category: + elif " or " in category: categories = [] - while ' or ' in category: - idx = category.find(' or ') + while " or " in category: + idx = category.find(" or ") if idx >= 0: categories.append(strip_article(category[:idx])) - category = category[idx + 4:].strip().strip('()[]') - if category.strip().strip('()[]'): + category = category[idx + 4 :].strip().strip("()[]") + if category.strip().strip("()[]"): categories.append(strip_article(category)) for cat in list(categories): categories = categories + apply_nlp(cat) return categories else: categories = category.split() - return [' '.join(categories[-idx:]) for idx in range(len(categories) - 1, 0, -1)] + return [" ".join(categories[-idx:]) for idx in range(len(categories) - 1, 0, -1)] else: return [] -def apply_aat(category, merge=True): +def apply_aat(category, merge=True): # noqa: FBT002 """Expand the category using the AAT.""" - if os.path.exists('aat.json'): - with open('aat.json') as in_f: + if os.path.exists("aat.json"): + with open("aat.json") as in_f: cache = json.load(in_f) else: cache = {} if category not in cache: cache[category] = [] - response = requests.get('http://vocabsservices.getty.edu/AATService.asmx/AATGetTermMatch', - params=[('term', f'"{category}"'), - ('logop', 'and'), - ('notes', '')]) - if response.status_code == 200: - subjects = etree.fromstring(response.content).xpath('Subject/Subject_ID/text()') + response = requests.get( + "http://vocabsservices.getty.edu/AATService.asmx/AATGetTermMatch", + params=[("term", f'"{category}"'), ("logop", "and"), ("notes", "")], + timeout=300, + ) + if response.status_code == 200: # noqa: PLR2004 + subjects = etree.fromstring(response.content).xpath("Subject/Subject_ID/text()") # noqa: S320 hierarchies = [] for subject in subjects: - response2 = requests.get('http://vocabsservices.getty.edu/AATService.asmx/AATGetSubject', - params=[('subjectID', subject)]) - if response.status_code == 200: - hierarchy_text = etree.fromstring(response2.content).xpath('Subject/Hierarchy/text()') + response2 = requests.get( + "http://vocabsservices.getty.edu/AATService.asmx/AATGetSubject", + params=[("subjectID", subject)], + timeout=300, + ) + if response.status_code == 200: # noqa: PLR2004 + hierarchy_text = etree.fromstring(response2.content).xpath("Subject/Hierarchy/text()") # noqa: S320 if hierarchy_text: hierarchy = [] - for entry in [h.strip() for h in hierarchy_text[0].split('|') if '<' not in h]: - entry = entry.lower() - if '(' in entry: - entry = entry[:entry.find('(')].strip() - if entry.endswith(' facet'): - entry = entry[:entry.find(' facet')].strip() - if entry.endswith(' genres'): - entry = entry[:entry.find(' genres')].strip() + for entry in [h.strip() for h in hierarchy_text[0].split("|") if "<" not in h]: + entry = entry.lower() # noqa: PLW2901 + if "(" in entry: + entry = entry[: entry.find("(")].strip() # noqa: PLW2901 + if entry.endswith(" facet"): + entry = entry[: entry.find(" facet")].strip() # noqa: PLW2901 + if entry.endswith(" genres"): + entry = entry[: entry.find(" genres")].strip() # noqa: PLW2901 if entry not in hierarchy: hierarchy.append(entry) hierarchies.append(hierarchy) @@ -172,11 +176,11 @@ def apply_aat(category, merge=True): for hierarchy in hierarchies: for start in range(0, len(hierarchy)): if hierarchy[start] not in cache: - if hierarchy[start + 1:]: - cache[hierarchy[start]] = [hierarchy[start + 1:]] + if hierarchy[start + 1 :]: + cache[hierarchy[start]] = [hierarchy[start + 1 :]] else: cache[hierarchy[start]] = [] - with open('aat.json', 'w') as out_f: + with open("aat.json", "w") as out_f: json.dump(cache, out_f) if merge: if len(cache[category]) > 1: @@ -204,16 +208,16 @@ async def expand_categories_impl(config): async with create_sessionmaker(config)() as dbsession: count = await dbsession.execute(select(func.count(Item.id))) result = await dbsession.execute(select(Item)) - with click.progressbar(result.scalars(), length=count.scalar_one(), label='Expanding categories') as progress: + with click.progressbar(result.scalars(), length=count.scalar_one(), label="Expanding categories") as progress: for item in progress: - categories = [c.lower() for c in item.attributes[config['data']['hierarchy']['field']]] - if 'nlp' in config['data']['hierarchy']['expansions']: - for category in item.attributes[config['data']['hierarchy']['field']]: + categories = [c.lower() for c in item.attributes[config["data"]["hierarchy"]["field"]]] + if "nlp" in config["data"]["hierarchy"]["expansions"]: + for category in item.attributes[config["data"]["hierarchy"]["field"]]: categories = categories + apply_nlp(category.lower()) - if 'aat' in config['data']['hierarchy']['expansions']: + if "aat" in config["data"]["hierarchy"]["expansions"]: for category in list(categories): categories = categories + apply_aat(category) - item.attributes['_categories'] = categories + item.attributes["_categories"] = categories await dbsession.commit() @@ -221,41 +225,44 @@ async def expand_categories_impl(config): @click.pass_context def expand_categories(ctx): """Expand the object categories.""" - asyncio.run(expand_categories_impl(ctx.obj['config'])) + asyncio.run(expand_categories_impl(ctx.obj["config"])) async def generate_topic_vectors_impl(config): """Generate topic vectors for all items.""" async with create_sessionmaker(config)() as dbsession: - async def texts(dictionary=None, label=''): + + async def texts(dictionary=None, label=""): count = await dbsession.execute(select(func.count(Item.id))) result = await dbsession.execute(select(Item)) with click.progressbar(result.scalars(), length=count.scalar_one(), label=label) as progress: for item in progress: - if '_tokens' in item.attributes: + if "_tokens" in item.attributes: if dictionary: - yield dictionary.doc2bow(item.attributes['_tokens']) + yield dictionary.doc2bow(item.attributes["_tokens"]) else: - yield item.attributes['_tokens'] + yield item.attributes["_tokens"] dictionary = corpora.Dictionary() - async for tokens in texts(label='Generating dictionary'): + async for tokens in texts(label="Generating dictionary"): dictionary.add_documents([tokens]) dictionary.filter_extremes(keep_n=None) corpus = [] - async for bow in texts(dictionary=dictionary, label='Generating corpus'): + async for bow in texts(dictionary=dictionary, label="Generating corpus"): corpus.append(bow) - waiting = ClickIndeterminate('Generating model') + waiting = ClickIndeterminate("Generating model") waiting.start() model = models.LdaModel(corpus, num_topics=300, id2word=dictionary, update_every=0) waiting.stop() count = await dbsession.execute(select(func.count(Item.id))) result = await dbsession.execute(select(Item)) - with click.progressbar(result.scalars(), length=count.scalar_one(), label='Generating topic vectors') as progress: + with click.progressbar( + result.scalars(), length=count.scalar_one(), label="Generating topic vectors" + ) as progress: for item in progress: - if '_tokens' in item.attributes: - vec = model[dictionary.doc2bow(item.attributes['_tokens'])] - item.attributes['lda_vector'] = [(wid, float(prob)) for wid, prob in vec] + if "_tokens" in item.attributes: + vec = model[dictionary.doc2bow(item.attributes["_tokens"])] + item.attributes["lda_vector"] = [(wid, float(prob)) for wid, prob in vec] await dbsession.commit() @@ -263,7 +270,7 @@ async def texts(dictionary=None, label=''): @click.pass_context def generate_topic_vectors(ctx): """Generate topic vectors for all items.""" - asyncio.run(generate_topic_vectors_impl(ctx.obj['config'])) + asyncio.run(generate_topic_vectors_impl(ctx.obj["config"])) async def pipeline_impl(config): @@ -272,11 +279,13 @@ async def pipeline_impl(config): await tokenise_impl(config) await generate_topic_vectors_impl(config) + @click.command() @click.pass_context def pipeline(ctx): """Run the items processing pipeline.""" - asyncio.run(pipeline_impl(ctx.obj['config'])) + asyncio.run(pipeline_impl(ctx.obj["config"])) + @click.group() def items(): diff --git a/museum_map/cli/layout.py b/museum_map/cli/layout.py index d085e2b..32a224b 100644 --- a/museum_map/cli/layout.py +++ b/museum_map/cli/layout.py @@ -1,17 +1,17 @@ import asyncio -import click import math - from copy import deepcopy +from random import choice + +import click from inflection import pluralize -from random import sample, choice from scipy.spatial.distance import cosine -from sqlalchemy import func, delete +from sqlalchemy import delete, func from sqlalchemy.future import select from sqlalchemy.orm import selectinload -from .groups import fill_vector -from ..models import Item, Group, Room, Floor, FloorTopic, create_sessionmaker +from museum_map.cli.groups import fill_vector +from museum_map.models import Floor, FloorTopic, Group, Item, Room, create_sessionmaker async def count_items(dbsession, group): @@ -34,7 +34,9 @@ def walk(node): for child in node.children: walk(child) - stmt = select(Group).options(selectinload(Group.parent), selectinload(Group.children), selectinload(Group.items), selectinload(Group.room)) + stmt = select(Group).options( + selectinload(Group.parent), selectinload(Group.children), selectinload(Group.items), selectinload(Group.room) + ) result = await dbsession.execute(stmt) for root in result.scalars(): if root.parent is None: @@ -45,29 +47,29 @@ def walk(node): def pluralize_label(label): """Pluralise the label.""" - if ' ' in label: - if ' - ' in label: - parts = label.split(' - ') + if " " in label: + if " - " in label: + parts = label.split(" - ") parts[0] = pluralize_label(parts[0]) - label = ' - '.join(parts) - elif ' of ' in label: - part = label[:label.find(' of ')] + label = " - ".join(parts) + elif " of " in label: + part = label[: label.find(" of ")] label = f'{pluralize_label(part)}{label[label.find(" of "):]}' - elif ' for ' in label: - part = label[:label.find(' for ')] + elif " for " in label: + part = label[: label.find(" for ")] label = f'{pluralize_label(part)}{label[label.find(" for "):]}' - elif ' and ' in label: - part1 = label[:label.find(' and ')] - part2 = label[label.find(' and ') + 5:] - label = f'{pluralize_label(part1)} and {pluralize_label(part2)}' - elif ' or ' in label: - part1 = label[:label.find(' or ')] - part2 = label[label.find(' or ') + 4:] - label = f'{pluralize_label(part1)} or {pluralize_label(part2)}' + elif " and " in label: + part1 = label[: label.find(" and ")] + part2 = label[label.find(" and ") + 5 :] + label = f"{pluralize_label(part1)} and {pluralize_label(part2)}" + elif " or " in label: + part1 = label[: label.find(" or ")] + part2 = label[label.find(" or ") + 4 :] + label = f"{pluralize_label(part1)} or {pluralize_label(part2)}" else: - parts = label.split(' ') + parts = label.split(" ") parts[-1] = pluralize(parts[-1]) - label = ' '.join(parts) + label = " ".join(parts) else: label = pluralize(label) return label @@ -79,16 +81,20 @@ async def generate_rooms(dbsession, floor, nr, room_ids, rooms, assigned): rid = room_ids.pop() room = rooms[rid] splits_left = 1 # room['max_splits'] - items_left = room['items'] + items_left = room["items"] for group in await get_assignable_groups(dbsession, assigned): if items_left >= len(group.items) and splits_left > 0: label = pluralize_label(group.label) - dbsession.add(Room(number=f'{floor.level}.{nr}', - label=label, - group=group, - floor=floor, - items=group.items, - position=room['position'])) + dbsession.add( + Room( + number=f"{floor.level}.{nr}", + label=label, + group=group, + floor=floor, + items=group.items, + position=room["position"], + ) + ) items_left = items_left - len(group.items) splits_left = splits_left - 1 assigned.append(group.id) @@ -100,19 +106,19 @@ async def generate_rooms(dbsession, floor, nr, room_ids, rooms, assigned): async def generate_structure_impl(config): """Generate the floors and rooms structure.""" async with create_sessionmaker(config)() as dbsession: - room_ids = [room['id'] for room in config['layout']['rooms']] + room_ids = [room["id"] for room in config["layout"]["rooms"]] room_ids.reverse() - rooms = dict([(room['id'], room) for room in config['layout']['rooms']]) + rooms = {room["id"]: room for room in config["layout"]["rooms"]} assigned = [] assignable = await get_assignable_groups(dbsession, assigned) old_len = len(assignable) floor_nr = -1 - progress = click.progressbar(length=len(assignable), label='Generating layout') + progress = click.progressbar(length=len(assignable), label="Generating layout") progress.update(0) while assignable: floor_nr = floor_nr + 1 - floor = Floor(label = f'Floor {floor_nr}', level=floor_nr) + floor = Floor(label=f"Floor {floor_nr}", level=floor_nr) dbsession.add(floor) await generate_rooms(dbsession, floor, 1, deepcopy(room_ids), rooms, assigned) assignable = await get_assignable_groups(dbsession, assigned) @@ -125,12 +131,12 @@ async def generate_structure_impl(config): @click.pass_context def generate_structure(ctx): """Generate the floors and rooms structure.""" - asyncio.run(generate_structure_impl(ctx.obj['config'])) + asyncio.run(generate_structure_impl(ctx.obj["config"])) def get_basic_group(group): """Find the first basic group""" - if group.split == 'basic': + if group.split == "basic": return group else: return get_basic_group(group.parent) @@ -147,7 +153,7 @@ def depth(group): def get_ancestors(group): """Determine all the ancestors of a group.""" if group.parent: - return [group.parent] + get_ancestors(group.parent) + return [group.parent, *get_ancestors(group.parent)] else: return [] @@ -156,27 +162,35 @@ async def summarise_rooms(dbsession): """Generate the room summaries.""" rooms = await dbsession.execute(select(Room).options(selectinload(Room.items))) rooms_count = await dbsession.execute(select(func.count(Room.id))) - with click.progressbar(rooms.scalars(), length=rooms_count.scalar_one(), label='Generating room summaries') as progress: + with click.progressbar( + rooms.scalars(), length=rooms_count.scalar_one(), label="Generating room summaries" + ) as progress: for room in progress: - room.sample = choice(room.items) + room.sample = choice(room.items) # noqa: S311 dbsession.add(room) await dbsession.commit() async def summarise_floors(dbsession): """Generate the floor summaries.""" - floors = await dbsession.execute(select(Floor).options(selectinload(Floor.topics), selectinload(Floor.rooms), selectinload(Floor.samples))) + floors = await dbsession.execute( + select(Floor).options(selectinload(Floor.topics), selectinload(Floor.rooms), selectinload(Floor.samples)) + ) floors_count = await dbsession.execute(select(func.count(Floor.id))) - with click.progressbar(floors.scalars(), length=floors_count.scalar_one(), label='Generating floor summaries') as progress: + with click.progressbar( + floors.scalars(), length=floors_count.scalar_one(), label="Generating floor summaries" + ) as progress: for floor in progress: floor_groups = {} if len(floor.topics) == 0: - groups = await dbsession.execute(select(Group).join(Group.room).filter(Room.floor_id == floor.id).options(selectinload(Group.items))) + groups = await dbsession.execute( + select(Group).join(Group.room).filter(Room.floor_id == floor.id).options(selectinload(Group.items)) + ) for group in groups.scalars(): size = await count_items(dbsession, group) - while group.split in ['time', 'similar', 'attribute', 'inner']: + while group.split in ["time", "similar", "attribute", "inner"]: parent_result = await dbsession.execute(select(Group).filter(Group.id == group.parent_id)) - group = parent_result.scalar_one() + group = parent_result.scalar_one() # noqa: PLW2901 if group in floor_groups: floor_groups[group] = floor_groups[group] + size else: @@ -188,9 +202,11 @@ async def summarise_floors(dbsession): for group, size in group_sizes: sub_total = sub_total + size dbsession.add(FloorTopic(label=pluralize_label(group.label), group=group, floor=floor, size=size)) - if sub_total / total > 0.66666: + if sub_total / total > 0.66666: # noqa: PLR2004 break - items_result = await dbsession.execute(select(Item).filter(Item.room_id.in_([room.id for room in floor.rooms]))) + items_result = await dbsession.execute( + select(Item).filter(Item.room_id.in_([room.id for room in floor.rooms])) + ) items = list(items_result.scalars()) floor.samples = [items[idx] for idx in range(0, len(items), math.floor(len(items) / 15))] await dbsession.commit() @@ -209,7 +225,7 @@ async def generate_summaries_impl(config): @click.pass_context def generate_summaries(ctx): """Generate the floor and room summaries""" - asyncio.run(generate_summaries_impl(ctx.obj['config'])) + asyncio.run(generate_summaries_impl(ctx.obj["config"])) async def order_items_impl(config): @@ -218,7 +234,9 @@ async def order_items_impl(config): result = await dbsession.execute(stmt) stmt_count = select(func.count(Room.id)) result_count = await dbsession.execute(stmt_count) - with click.progressbar(result.scalars(), length=result_count.scalar_one(), label='Ordering items in rooms') as progress: + with click.progressbar( + result.scalars(), length=result_count.scalar_one(), label="Ordering items in rooms" + ) as progress: for room in progress: vectors = {} sorted_items = [] @@ -247,7 +265,7 @@ async def order_items_impl(config): @click.pass_context def order_items(ctx): """Order the items in each room""" - asyncio.run(order_items_impl(ctx.obj['config'])) + asyncio.run(order_items_impl(ctx.obj["config"])) async def pipeline_impl(config): @@ -261,7 +279,7 @@ async def pipeline_impl(config): @click.pass_context def pipeline(ctx): """Run the layout pipeline.""" - asyncio.run(pipeline_impl(ctx.obj['config'])) + asyncio.run(pipeline_impl(ctx.obj["config"])) @click.group() diff --git a/museum_map/cli/search.py b/museum_map/cli/search.py index 79460e9..d2626f7 100644 --- a/museum_map/cli/search.py +++ b/museum_map/cli/search.py @@ -1,6 +1,6 @@ import asyncio -import click +import click from meilisearch_python_async import Client from meilisearch_python_async.models.settings import Faceting from meilisearch_python_async.task import wait_for_task @@ -8,46 +8,48 @@ from sqlalchemy.future import select from sqlalchemy.orm import selectinload -from .util import ClickIndeterminate -from ..models import Room, create_sessionmaker +from museum_map.cli.util import ClickIndeterminate +from museum_map.models import Room, create_sessionmaker async def index_impl(config): """The actual indexing implementation.""" async with create_sessionmaker(config)() as dbsession: - async with Client(config['search']['url'], config['search']['key']) as client: + async with Client(config["search"]["url"], config["search"]["key"]) as client: try: - index = await client.get_index('items') + index = await client.get_index("items") task = await index.delete() await wait_for_task(client, task.task_uid, timeout_in_ms=None) - except Exception: + except Exception: # noqa: S110 pass - items_idx = await client.create_index('items', primary_key='mmap_id') + items_idx = await client.create_index("items", primary_key="mmap_id") stmt = select(Room).options(selectinload(Room.items)) result = await dbsession.execute(stmt) stmt_count = select(func.count(Room.id)) result_count = await dbsession.execute(stmt_count) docs = [] - with click.progressbar(result.scalars(), length=result_count.scalar_one(), label='Generating rooms documents') as progress: + with click.progressbar( + result.scalars(), length=result_count.scalar_one(), label="Generating rooms documents" + ) as progress: for room in progress: for item in room.items: doc = { - 'mmap_id': item.id, - 'mmap_room': room.id, - 'mmap_floor': room.floor_id, + "mmap_id": item.id, + "mmap_room": room.id, + "mmap_floor": room.floor_id, } doc.update(item.attributes) docs.append(doc) tasks = await items_idx.add_documents_in_batches(docs) - with click.progressbar(tasks, label='Waiting for indexing to complete') as progress: + with click.progressbar(tasks, label="Waiting for indexing to complete") as progress: for task in progress: await wait_for_task(client, task.task_uid, timeout_in_ms=None, interval_in_ms=1000) - progress = ClickIndeterminate('Updating filterable attributes') + progress = ClickIndeterminate("Updating filterable attributes") progress.start() - task = await items_idx.update_filterable_attributes(['mmap_room', 'mmap_floor']) + task = await items_idx.update_filterable_attributes(["mmap_room", "mmap_floor"]) await wait_for_task(client, task.task_uid, timeout_in_ms=None, interval_in_ms=1000) progress.stop() - progress = ClickIndeterminate('Updating faceting settings') + progress = ClickIndeterminate("Updating faceting settings") progress.start() task = await items_idx.update_faceting(Faceting(max_values_per_facet=1000)) await wait_for_task(client, task.task_uid, timeout_in_ms=None, interval_in_ms=1000) @@ -58,7 +60,7 @@ async def index_impl(config): @click.pass_context def index(ctx): """Index the data""" - asyncio.run(index_impl(ctx.obj['config'])) + asyncio.run(index_impl(ctx.obj["config"])) async def pipeline_impl(config): diff --git a/museum_map/cli/server.py b/museum_map/cli/server.py index 643e7aa..199198d 100644 --- a/museum_map/cli/server.py +++ b/museum_map/cli/server.py @@ -1,19 +1,17 @@ -import click - from importlib import resources -from sqlalchemy.ext.asyncio import create_async_engine + +import click from tornado.ioloop import IOLoop from tornado.web import Application, StaticFileHandler -from ..models import create_engine -from ..server.handlers import ( +from museum_map.server.handlers import ( APICollectionHandler, APIConfigHandler, APIItemHandler, APIPickHandler, - FrontendHandler, - APIStatusHandler, APISearchHandler, + APIStatusHandler, + FrontendHandler, create_inject_item_html, ) @@ -21,29 +19,29 @@ @click.command() @click.pass_context def run(ctx): - config = ctx.obj['config'] + config = ctx.obj["config"] app = Application( [ - ('/api', APIStatusHandler), - ('/api/picks/([a-z\-]+)', APIPickHandler), - ('/api/config/all', APIConfigHandler, {'config': config}), - ('/api/search', APISearchHandler), - ('/api/([a-z\-]+)', APICollectionHandler), - ('/api/([a-z\-]+)/([0-9]+)', APIItemHandler), - ('/images/(.*)', StaticFileHandler, {'path': config['images']['basepath']}), + ("/api", APIStatusHandler), + (r"/api/picks/([a-z\-]+)", APIPickHandler), + ("/api/config/all", APIConfigHandler, {"config": config}), + ("/api/search", APISearchHandler), + (r"/api/([a-z\-]+)", APICollectionHandler), + (r"/api/([a-z\-]+)/([0-9]+)", APIItemHandler), + ("/images/(.*)", StaticFileHandler, {"path": config["images"]["basepath"]}), ( - '/(.*)', + "/(.*)", FrontendHandler, { - 'base': resources.files('museum_map') / 'server' / 'frontend' / 'public', - 'html_injectors': {r'room/([0-9]+)/([0-9]+)': create_inject_item_html(config)}, + "base": resources.files("museum_map") / "server" / "frontend" / "public", + "html_injectors": {r"room/([0-9]+)/([0-9]+)": create_inject_item_html(config)}, }, ), ], autoreload=True, config=config, ) - app.listen(config['server']['port'], address=config['server']['host']) + app.listen(config["server"]["port"], address=config["server"]["host"]) IOLoop.current().start() diff --git a/museum_map/cli/util.py b/museum_map/cli/util.py index f8e893c..1fc3cc8 100644 --- a/museum_map/cli/util.py +++ b/museum_map/cli/util.py @@ -1,9 +1,9 @@ """Utility functionality for the cli.""" -import click - from threading import Thread from time import sleep +import click + class ClickIndeterminate(Thread): """A thread that shows a indeterminate busy animation using the cli.""" @@ -15,16 +15,16 @@ def __init__(self, label): def run(self): """Run the animation sequence.""" - anim = ['\u28fe', '\u28f7', '\u28ef', '\u28df', '\u287f', '\u28bf', '\u28fb', '\u28fd'] + anim = ["\u28fe", "\u28f7", "\u28ef", "\u28df", "\u287f", "\u28bf", "\u28fb", "\u28fd"] anim.reverse() self._active = True - click.echo(f'{self._label} ', nl=False) + click.echo(f"{self._label} ", nl=False) while self._active: - click.echo(f'\b{anim[-1]}', nl=False) + click.echo(f"\b{anim[-1]}", nl=False) anim.insert(0, anim.pop()) sleep(0.15) def stop(self): """Stop the animation sequence.""" self._active = False - click.echo(f'\b\u2713') + click.echo("\b\u2713") diff --git a/museum_map/models/__init__.py b/museum_map/models/__init__.py index 3aacbad..7ca6020 100644 --- a/museum_map/models/__init__.py +++ b/museum_map/models/__init__.py @@ -1,30 +1,32 @@ """Database models.""" -from sqlalchemy.ext.asyncio import create_async_engine, AsyncEngine, AsyncSession -from sqlalchemy.orm import sessionmaker +from collections.abc import Callable -from typing import Callable +from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, create_async_engine +from sqlalchemy.orm import sessionmaker from .base import Base # noqa -from .item import Item # noqa +from .floor import Floor, FloorTopic # noqa from .group import Group # noqa +from .item import Item # noqa from .room import Room # noqa -from .floor import Floor, FloorTopic # noqa - engine = None + def create_engine(config) -> AsyncEngine: """Get a new singleton DB engine.""" - global engine + global engine # noqa: PLW0603 if engine is None: - engine = create_async_engine(config['db']['dsn']) + engine = create_async_engine(config["db"]["dsn"]) return engine async_sessionmaker = None + + def create_sessionmaker(config) -> Callable[[], AsyncSession]: """Get a new singleton DB session maker.""" - global async_sessionmaker + global async_sessionmaker # noqa: PLW0603 if async_sessionmaker is None: async_sessionmaker = sessionmaker(create_engine(config), expire_on_commit=False, class_=AsyncSession) return async_sessionmaker diff --git a/museum_map/models/base.py b/museum_map/models/base.py index 6e8891e..aa6409a 100644 --- a/museum_map/models/base.py +++ b/museum_map/models/base.py @@ -2,11 +2,11 @@ from sqlalchemy.ext.declarative import declarative_base NAMING_CONVENTION = { - "ix": 'ix_%(column_0_label)s', + "ix": "ix_%(column_0_label)s", "uq": "uq_%(table_name)s_%(column_0_name)s", "ck": "ck_%(table_name)s_%(constraint_name)s", "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", - "pk": "pk_%(table_name)s" + "pk": "pk_%(table_name)s", } metadata = MetaData(naming_convention=NAMING_CONVENTION) diff --git a/museum_map/models/floor.py b/museum_map/models/floor.py index a001e3b..2034885 100644 --- a/museum_map/models/floor.py +++ b/museum_map/models/floor.py @@ -1,102 +1,67 @@ -from sqlalchemy import (Table, Column, Integer, Unicode, UnicodeText, ForeignKey, Index) +from sqlalchemy import Column, ForeignKey, Index, Integer, Table, Unicode from sqlalchemy.orm import relationship -from sqlalchemy_json import NestedMutableJson -from .base import Base +from museum_map.models.base import Base - -floors_items = Table('floors_items', Base.metadata, - Column('floor_id', Integer, ForeignKey('floors.id')), - Column('item_id', Integer, ForeignKey('items.id'))) +floors_items = Table( + "floors_items", + Base.metadata, + Column("floor_id", Integer, ForeignKey("floors.id")), + Column("item_id", Integer, ForeignKey("items.id")), +) class Floor(Base): + __tablename__ = "floors" - __tablename__ = 'floors' - - id = Column(Integer, primary_key=True) + id = Column(Integer, primary_key=True) # noqa: A003 label = Column(Unicode(255)) level = Column(Integer) - rooms = relationship('Room', back_populates='floor') - samples = relationship('Item', secondary=floors_items) - topics = relationship('FloorTopic', back_populates='floor') + rooms = relationship("Room", back_populates="floor") + samples = relationship("Item", secondary=floors_items) + topics = relationship("FloorTopic", back_populates="floor") def as_jsonapi(self): return { - 'type': 'floors', - 'id': str(self.id), - 'attributes': { - 'label': self.label, - 'level': self.level, + "type": "floors", + "id": str(self.id), + "attributes": { + "label": self.label, + "level": self.level, + }, + "relationships": { + "rooms": {"data": [{"type": "rooms", "id": str(room.id)} for room in self.rooms]}, + "samples": {"data": [{"type": "items", "id": str(item.id)} for item in self.samples]}, + "topics": {"data": [{"type": "floor-topics", "id": str(topic.id)} for topic in self.topics]}, }, - 'relationships': { - 'rooms': { - 'data': [ - { - 'type': 'rooms', - 'id': str(room.id) - } - for room in self.rooms - ] - }, - 'samples': { - 'data': [ - { - 'type': 'items', - 'id': str(item.id) - } - for item in self.samples - ] - }, - 'topics': { - 'data': [ - { - 'type': 'floor-topics', - 'id': str(topic.id) - } - for topic in self.topics - ] - } - } } class FloorTopic(Base): + __tablename__ = "floor_topics" - __tablename__ = 'floor_topics' - - id = Column(Integer, primary_key=True) - group_id = Column(Integer, ForeignKey('groups.id')) - floor_id = Column(Integer, ForeignKey('floors.id')) + id = Column(Integer, primary_key=True) # noqa: A003 + group_id = Column(Integer, ForeignKey("groups.id")) + floor_id = Column(Integer, ForeignKey("floors.id")) label = Column(Unicode(255)) size = Column(Integer) - group = relationship('Group') - floor = relationship('Floor', back_populates='topics') + group = relationship("Group") + floor = relationship("Floor", back_populates="topics") def as_jsonapi(self): return { - 'type': 'floor-topics', - 'id': str(self.id), - 'attributes': { - 'label': self.label, - 'size': self.size, + "type": "floor-topics", + "id": str(self.id), + "attributes": { + "label": self.label, + "size": self.size, + }, + "relationships": { + "group": {"data": {"type": "groups", "id": str(self.group_id)}}, + "floor": {"data": {"type": "floors", "id": str(self.floor_id)}}, }, - 'relationships': { - 'group': { - 'data': { - 'type': 'groups', - 'id': str(self.group_id) - } - }, - 'floor': { - 'data': { - 'type': 'floors', - 'id': str(self.floor_id) - } - } - } } diff --git a/museum_map/models/group.py b/museum_map/models/group.py index 957567f..1e1ac5e 100644 --- a/museum_map/models/group.py +++ b/museum_map/models/group.py @@ -1,34 +1,32 @@ -from sqlalchemy import (Column, Integer, Unicode, ForeignKey, Index) +from sqlalchemy import Column, ForeignKey, Index, Integer, Unicode from sqlalchemy.orm import relationship -from sqlalchemy_json import NestedMutableJson -from .base import Base +from museum_map.models.base import Base class Group(Base): + __tablename__ = "groups" - __tablename__ = 'groups' - - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('groups.id')) + id = Column(Integer, primary_key=True) # noqa: A003 + parent_id = Column(Integer, ForeignKey("groups.id")) value = Column(Unicode(255)) label = Column(Unicode(255)) split = Column(Unicode(64)) - parent = relationship('Group', remote_side=[id], back_populates='children', uselist=False) - children = relationship('Group', remote_side=[parent_id]) - items = relationship('Item', back_populates='group') - room = relationship('Room', back_populates='group', uselist=False) + parent = relationship("Group", remote_side=[id], back_populates="children", uselist=False) + children = relationship("Group", remote_side=[parent_id]) + items = relationship("Item", back_populates="group") + room = relationship("Room", back_populates="group", uselist=False) def as_jsonapi(self): return { - 'type': 'groups', - 'id': str(self.id), - 'attributes': { - 'value': self.value, - 'label': self.label, - 'split': self.split, - } + "type": "groups", + "id": str(self.id), + "attributes": { + "value": self.value, + "label": self.label, + "split": self.split, + }, } diff --git a/museum_map/models/item.py b/museum_map/models/item.py index 3dc2299..6708ada 100644 --- a/museum_map/models/item.py +++ b/museum_map/models/item.py @@ -1,37 +1,26 @@ -from sqlalchemy import (Column, Integer, ForeignKey, Index) +from sqlalchemy import Column, ForeignKey, Index, Integer from sqlalchemy.orm import relationship from sqlalchemy_json import NestedMutableJson -from .base import Base +from museum_map.models.base import Base class Item(Base): + __tablename__ = "items" - __tablename__ = 'items' - - id = Column(Integer, primary_key=True) - group_id = Column(Integer, ForeignKey('groups.id')) - room_id = Column(Integer, ForeignKey('rooms.id')) + id = Column(Integer, primary_key=True) # noqa: A003 + group_id = Column(Integer, ForeignKey("groups.id")) + room_id = Column(Integer, ForeignKey("rooms.id")) attributes = Column(NestedMutableJson) sequence = Column(Integer) - group = relationship('Group', back_populates='items') - room = relationship('Room', back_populates='items', primaryjoin='Item.room_id == Room.id') + group = relationship("Group", back_populates="items") + room = relationship("Room", back_populates="items", primaryjoin="Item.room_id == Room.id") def as_jsonapi(self): - data = { - 'type': 'items', - 'id': str(self.id), - 'attributes': self.attributes, - 'relationships': {} - } - if (self.room): - data['relationships']['room'] = { - 'data': { - 'type': 'rooms', - 'id': str(self.room_id) - } - } + data = {"type": "items", "id": str(self.id), "attributes": self.attributes, "relationships": {}} + if self.room: + data["relationships"]["room"] = {"data": {"type": "rooms", "id": str(self.room_id)}} return data diff --git a/museum_map/models/room.py b/museum_map/models/room.py index 6b1b4e5..f185571 100644 --- a/museum_map/models/room.py +++ b/museum_map/models/room.py @@ -1,64 +1,38 @@ -from sqlalchemy import (Column, Integer, Unicode, ForeignKey, Index) +from sqlalchemy import Column, ForeignKey, Index, Integer, Unicode from sqlalchemy.orm import relationship from sqlalchemy_json import NestedMutableJson -from .base import Base +from museum_map.models.base import Base class Room(Base): + __tablename__ = "rooms" - __tablename__ = 'rooms' - - id = Column(Integer, primary_key=True) - floor_id = Column(Integer, ForeignKey('floors.id')) - group_id = Column(Integer, ForeignKey('groups.id')) - item_id = Column(Integer, ForeignKey('items.id')) + id = Column(Integer, primary_key=True) # noqa: A003 + floor_id = Column(Integer, ForeignKey("floors.id")) + group_id = Column(Integer, ForeignKey("groups.id")) + item_id = Column(Integer, ForeignKey("items.id")) number = Column(Unicode(16)) label = Column(Unicode(255)) position = Column(NestedMutableJson) - group = relationship('Group', back_populates='room') - floor = relationship('Floor', back_populates='rooms') - sample = relationship('Item', primaryjoin='Room.item_id == Item.id') - items = relationship('Item', - back_populates='room', - order_by='Item.sequence', - primaryjoin='Room.id == Item.room_id') + group = relationship("Group", back_populates="room") + floor = relationship("Floor", back_populates="rooms") + sample = relationship("Item", primaryjoin="Room.item_id == Item.id") + items = relationship("Item", back_populates="room", order_by="Item.sequence", primaryjoin="Room.id == Item.room_id") def as_jsonapi(self): data = { - 'type': 'rooms', - 'id': str(self.id), - 'attributes': { - 'number': self.number, - 'label': self.label, - 'position': self.position + "type": "rooms", + "id": str(self.id), + "attributes": {"number": self.number, "label": self.label, "position": self.position}, + "relationships": { + "floor": {"data": {"type": "floors", "id": str(self.floor_id)}}, + "items": {"data": [{"type": "items", "id": str(item.id)} for item in self.items]}, }, - 'relationships': { - 'floor': { - 'data': { - 'type': 'floors', - 'id': str(self.floor_id) - } - }, - 'items': { - 'data': [ - { - 'type': 'items', - 'id': str(item.id) - } - for item in self.items - ] - } - } } if self.sample: - data['relationships']['sample'] = { - 'data': { - 'type': 'items', - 'id': str(self.sample.id) - } - } + data["relationships"]["sample"] = {"data": {"type": "items", "id": str(self.sample.id)}} return data diff --git a/museum_map/server/handlers.py b/museum_map/server/handlers.py index eeb7aed..c70bf2e 100644 --- a/museum_map/server/handlers.py +++ b/museum_map/server/handlers.py @@ -1,22 +1,17 @@ import logging import math import re - -from configparser import ConfigParser -from datetime import datetime -from importlib import resources +from datetime import datetime, timezone from importlib.abc import Traversable -from meilisearch_python_async import Client from mimetypes import guess_type -from random import randint -from sqlalchemy import select, func -from sqlalchemy.orm import selectinload, noload -from sqlalchemy.ext.asyncio import AsyncSession + +from meilisearch_python_async import Client +from sqlalchemy import func, select +from sqlalchemy.orm import noload, selectinload from tornado import web from museum_map.__about__ import __version__ -from ..models import create_sessionmaker, Floor, FloorTopic, Room, Group, Item - +from museum_map.models import Floor, FloorTopic, Group, Item, Room, create_sessionmaker logger = logging.getLogger(__name__) @@ -28,21 +23,21 @@ def setup_query(types, multi_load): multi_loader = selectinload else: multi_loader = noload - if types == 'rooms': + if types == "rooms": query = select(Room).options(selectinload(Room.floor), multi_loader(Room.items), selectinload(Room.sample)) class_ = Room - elif types == 'floors': + elif types == "floors": query = select(Floor).options( multi_loader(Floor.rooms), multi_loader(Floor.samples), multi_loader(Floor.topics) ) class_ = Floor - elif types == 'items': + elif types == "items": query = select(Item).options(selectinload(Item.room)) class_ = Item - elif types == 'floor-topics': + elif types == "floor-topics": query = select(FloorTopic).options(selectinload(FloorTopic.group), selectinload(FloorTopic.floor)) class_ = FloorTopic - elif types == 'groups': + elif types == "groups": query = select(Group) class_ = Group return (query, class_) @@ -50,54 +45,54 @@ def setup_query(types, multi_load): class APIStatusHandler(web.RequestHandler): async def get(self): - async with create_sessionmaker(self.application.settings['config'])() as session: + async with create_sessionmaker(self.application.settings["config"])(): ready = False - self.write({'version': __version__, 'ready': ready}) + self.write({"version": __version__, "ready": ready}) class RequestBase(web.RequestHandler): def setup_query(self, types): - return setup_query(types, not self.get_argument('relationships', 'true').lower() == 'false') + return setup_query(types, not self.get_argument("relationships", "true").lower() == "false") class APICollectionHandler(RequestBase): async def get(self, types): - async with create_sessionmaker(self.application.settings['config'])() as session: + async with create_sessionmaker(self.application.settings["config"])() as session: query, class_ = self.setup_query(types) if query is not None and class_ is not None: for key, values in self.request.arguments.items(): - if key.startswith('filter['): - column = key[key.find('[') + 1 : key.find(']')] - if values == '': + if key.startswith("filter["): + column = key[key.find("[") + 1 : key.find("]")] + if values == "": query = query.filter(getattr(class_, column).in_([])) else: for value in values: - value = value.decode() - if value == '': + value = value.decode() # noqa: PLW2901 + if value == "": query = query.filter(getattr(class_, column).in_([])) else: - split_values = [int(v) for v in value.split(',')] + split_values = [int(v) for v in value.split(",")] if len(split_values) == 1: query = query.filter(getattr(class_, column) == split_values[0]) else: query = query.filter(getattr(class_, column).in_(split_values)) result = await session.execute(query) items = [item.as_jsonapi() for item in result.unique().scalars()] - self.write({'data': items}) + self.write({"data": items}) else: self.send_error(status_code=404) class APIItemHandler(RequestBase): async def get(self, types, identifier): - async with create_sessionmaker(self.application.settings['config'])() as session: + async with create_sessionmaker(self.application.settings["config"])() as session: query, class_ = self.setup_query(types) if query is not None and class_ is not None: - query = query.filter(getattr(class_, 'id') == int(identifier)) + query = query.filter(class_.id == int(identifier)) item = (await session.execute(query)).scalars().first() if item is not None: - self.write({'data': item.as_jsonapi()}) + self.write({"data": item.as_jsonapi()}) else: self.send_error(status_code=404) else: @@ -110,39 +105,39 @@ def initialize(self, config: dict) -> None: async def get(self): attributes = { - 'intro': self._config['app']['intro'], - 'item': self._config['app']['item'], + "intro": self._config["app"]["intro"], + "item": self._config["app"]["item"], } - if 'footer' in self._config['app']: - for footer_location in ['center', 'right']: - if footer_location in self._config['app']['footer']: - if 'footer' not in attributes: - attributes['footer'] = {} - attributes['footer'][footer_location] = { - 'label': self._config['app']['footer'][footer_location]['label'] + if "footer" in self._config["app"]: + for footer_location in ["center", "right"]: + if footer_location in self._config["app"]["footer"]: + if "footer" not in attributes: + attributes["footer"] = {} + attributes["footer"][footer_location] = { + "label": self._config["app"]["footer"][footer_location]["label"] } - if 'url' in self._config['app']['footer'][footer_location]: - attributes['footer'][footer_location]['url'] = self._config['app']['footer'][footer_location][ - 'url' + if "url" in self._config["app"]["footer"][footer_location]: + attributes["footer"][footer_location]["url"] = self._config["app"]["footer"][footer_location][ + "url" ] - self.write({'data': {'id': 'all', 'type': 'configs', 'attributes': attributes}}) + self.write({"data": {"id": "all", "type": "configs", "attributes": attributes}}) class APIPickHandler(RequestBase): - async def get(self, type): - if type in ['random', 'todays']: - async with create_sessionmaker(self.application.settings['config'])() as session: - query, class_ = self.setup_query('items') + async def get(self, pick_type): + if pick_type in ["random", "todays"]: + async with create_sessionmaker(self.application.settings["config"])() as session: + query, class_ = self.setup_query("items") if query is not None and class_ is not None: - if type == 'random': + if pick_type == "random": query = query.order_by(func.random()).limit(12) - elif type == 'todays': + elif pick_type == "todays": total = (await session.execute(select(func.count()).select_from(class_))).scalars().first() - row_nr = (math.floor(datetime.utcnow().timestamp() / 86400) % total) + 1 - query = query.order_by(getattr(class_, 'id')).offset(row_nr).limit(1) + row_nr = (math.floor(datetime.now(tz=timezone.utc).timestamp() / 86400) % total) + 1 + query = query.order_by(class_.id).offset(row_nr).limit(1) result = await session.execute(query) items = [item.as_jsonapi() for item in result.scalars()] - self.write({'data': items}) + self.write({"data": items}) else: self.send_error(status_code=404) else: @@ -150,27 +145,27 @@ async def get(self, type): class APISearchHandler(RequestBase): - def initialize(self: 'APISearchHandler'): + def initialize(self: "APISearchHandler"): self._client = Client( - self.application.settings['config']['search']['url'], self.application.settings['config']['search']['key'] + self.application.settings["config"]["search"]["url"], self.application.settings["config"]["search"]["key"] ) self._index = None - async def get(self: 'APISearchHandler'): + async def get(self: "APISearchHandler"): if self._index is None: - self._index = await self._client.get_index('items') + self._index = await self._client.get_index("items") result = await self._index.search( - self.get_argument('q'), + self.get_argument("q"), limit=150, - facets=['mmap_room', 'mmap_floor'], + facets=["mmap_room", "mmap_floor"], filter=[f'mmap_room = {self.get_argument("room")}'] - if self.get_argument('room', default=None) != None + if self.get_argument("room", default=None) is not None else [], ) self.write( { - 'hits': result.hits, - 'facetDistribution': result.facet_distribution, + "hits": result.hits, + "facetDistribution": result.facet_distribution, } ) @@ -178,7 +173,7 @@ async def get(self: 'APISearchHandler'): class FrontendHandler(web.RedirectHandler): """Handler for the frontend application files.""" - def initialize(self: 'FrontendHandler', base: Traversable, html_injectors: dict = None) -> None: + def initialize(self: "FrontendHandler", base: Traversable, html_injectors: dict | None = None) -> None: """Initialise the frontend handler.""" self._base = base if html_injectors: @@ -186,25 +181,25 @@ def initialize(self: 'FrontendHandler', base: Traversable, html_injectors: dict else: self._html_injectors = {} - async def get(self: 'FrontendHandler', path: str) -> None: + async def get(self: "FrontendHandler", path: str) -> None: """Get the file at the given path. :param path: The path to get. :type: path: str """ - self.xsrf_token + self.xsrf_token # noqa: B018 if not path.strip(): - path = '/' + path = "/" try: - logger.debug(f'Attempting to send {path}') - await self._get_resource(self._base, path.split('/')) + logger.debug(f"Attempting to send {path}") + await self._get_resource(self._base, path.split("/")) except FileNotFoundError: - logger.debug('Sending index.html') - await self._get_resource(self._base, ('index.html',), orig_path=path) + logger.debug("Sending index.html") + await self._get_resource(self._base, ("index.html",), orig_path=path) async def _get_resource( - self: 'FrontendHandler', resource: Traversable, path: list[str], orig_path: str = None - ) -> None: # noqa: E501 + self: "FrontendHandler", resource: Traversable, path: list[str], orig_path: str | None = None + ) -> None: """Send a file. Performs mimetype guessing and sets the appropriate Content-Type header. @@ -224,16 +219,16 @@ async def _get_resource( for key, injector in self._html_injectors.items(): match = re.match(key, orig_path) if match: - html = data.decode('utf-8') - split_idx = html.find('') - html = f'{html[:split_idx]}{await injector(*match.groups())}{html[split_idx:]}' - data = html.encode('utf-8') + html = data.decode("utf-8") + split_idx = html.find("") + html = f"{html[:split_idx]}{await injector(*match.groups())}{html[split_idx:]}" + data = html.encode("utf-8") mimetype = guess_type(path[-1]) if mimetype and mimetype[0]: - self.set_header('Content-Type', mimetype[0]) + self.set_header("Content-Type", mimetype[0]) self.write(data) - except IsADirectoryError: - raise FileNotFoundError() + except IsADirectoryError as err: + raise FileNotFoundError() from err def create_inject_item_html(config): @@ -242,11 +237,11 @@ def create_inject_item_html(config): async def inject_item_html(room_id: str, joke_id: str) -> str: try: async with create_sessionmaker(config)() as session: - query, class_ = setup_query('items', False) - query = query.filter(getattr(class_, 'id') == int(joke_id)) + query, class_ = setup_query("items", False) + query = query.filter(class_.id == int(joke_id)) item = (await session.execute(query)).scalar() if item: - return f''' + return f""" @@ -255,9 +250,9 @@ async def inject_item_html(room_id: str, joke_id: str) -> str: -''' - except Exception: +""" + except Exception: # noqa: S110 pass - return '' + return "" return inject_item_html