diff --git a/museum_map/__main__.py b/museum_map/__main__.py
index 98dcca0..cefaa56 100644
--- a/museum_map/__main__.py
+++ b/museum_map/__main__.py
@@ -1,4 +1,4 @@
-from .cli import cli
+from museum_map.cli import cli
if __name__ == "__main__":
cli()
diff --git a/museum_map/cli/__init__.py b/museum_map/cli/__init__.py
index 168d04c..b6d162a 100644
--- a/museum_map/cli/__init__.py
+++ b/museum_map/cli/__init__.py
@@ -1,275 +1,253 @@
import asyncio
-import click
import logging
import os
import sys
-import yaml
+import click
+import yaml
from cerberus import Validator
-from typing import Union
-
-from .db import db
-from .groups import groups, pipeline_impl as groups_pipeline
-from .server import server
-from .items import items, pipeline_impl as items_pipeline
-from .layout import layout, pipeline_impl as layout_pipeline
-from .search import search, pipeline_impl as search_pipeline
+from museum_map.cli.db import db
+from museum_map.cli.groups import groups
+from museum_map.cli.groups import pipeline_impl as groups_pipeline
+from museum_map.cli.items import items
+from museum_map.cli.items import pipeline_impl as items_pipeline
+from museum_map.cli.layout import layout
+from museum_map.cli.layout import pipeline_impl as layout_pipeline
+from museum_map.cli.search import pipeline_impl as search_pipeline
+from museum_map.cli.search import search
+from museum_map.cli.server import server
-logger = logging.getLogger('scr')
+logger = logging.getLogger("scr")
CONFIG_SCHEMA = {
- 'server': {
- 'type': 'dict',
- 'schema': {
- 'host': {
- 'type': 'string',
- 'default': '127.0.0.1'
- },
- 'port': {
- 'type': 'integer',
- 'default': 6543
- },
+ "server": {
+ "type": "dict",
+ "schema": {
+ "host": {"type": "string", "default": "127.0.0.1"},
+ "port": {"type": "integer", "default": 6543},
+ },
+ "default": {
+ "host": "127.0.0.1",
+ "port": 6543,
},
- 'default': {
- 'host': '127.0.0.1',
- 'port': 6543,
- }
},
- 'db': {
- 'type': 'dict',
- 'required': True,
- 'schema': {
- 'dsn': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
+ "db": {
+ "type": "dict",
+ "required": True,
+ "schema": {
+ "dsn": {
+ "type": "string",
+ "required": True,
+ "empty": False,
},
- }
+ },
},
- 'search': {
- 'type': 'dict',
- 'required': True,
- 'schema': {
- 'url': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
+ "search": {
+ "type": "dict",
+ "required": True,
+ "schema": {
+ "url": {
+ "type": "string",
+ "required": True,
+ "empty": False,
},
- 'key': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
+ "key": {
+ "type": "string",
+ "required": True,
+ "empty": False,
},
- }
+ },
},
- 'data': {
- 'type': 'dict',
- 'required': True,
- 'schema': {
- 'topic_fields': {
- 'type': 'list',
- 'required': True,
- 'minlength': 1,
- 'schema': {
- 'type': 'string',
- 'empty': False,
- }
+ "data": {
+ "type": "dict",
+ "required": True,
+ "schema": {
+ "topic_fields": {
+ "type": "list",
+ "required": True,
+ "minlength": 1,
+ "schema": {
+ "type": "string",
+ "empty": False,
+ },
},
- 'hierarchy': {
- 'type': 'dict',
- 'required': True,
- 'schema': {
- 'field': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
+ "hierarchy": {
+ "type": "dict",
+ "required": True,
+ "schema": {
+ "field": {
+ "type": "string",
+ "required": True,
+ "empty": False,
},
- 'expansions': {
- 'type': 'list',
- 'required': False,
- 'default': [],
- 'schema': {
- 'type': 'string',
- 'allowed': ['nlp', 'aat'],
- }
- }
- }
+ "expansions": {
+ "type": "list",
+ "required": False,
+ "default": [],
+ "schema": {
+ "type": "string",
+ "allowed": ["nlp", "aat"],
+ },
+ },
+ },
},
- 'year_field': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
- }
- }
+ "year_field": {
+ "type": "string",
+ "required": True,
+ "empty": False,
+ },
+ },
},
- 'images': {
- 'type': 'dict',
- 'required': True,
- 'schema': {
- 'basepath': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
+ "images": {
+ "type": "dict",
+ "required": True,
+ "schema": {
+ "basepath": {
+ "type": "string",
+ "required": True,
+ "empty": False,
}
- }
+ },
},
- 'layout': {
- 'type': 'dict',
- 'required': True,
- 'schema': {
- 'rooms': {
- 'type': 'list',
- 'required': True,
- 'minlength': 1,
- 'schema': {
- 'type': 'dict',
- 'schema': {
- 'id': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
+ "layout": {
+ "type": "dict",
+ "required": True,
+ "schema": {
+ "rooms": {
+ "type": "list",
+ "required": True,
+ "minlength": 1,
+ "schema": {
+ "type": "dict",
+ "schema": {
+ "id": {
+ "type": "string",
+ "required": True,
+ "empty": False,
},
- 'direction': {
- 'type': 'string',
- 'required': True,
- 'allowed': ['vert', 'horiz'],
+ "direction": {
+ "type": "string",
+ "required": True,
+ "allowed": ["vert", "horiz"],
},
- 'items': {
- 'type': 'integer',
- 'required': True,
- 'min': 1,
+ "items": {
+ "type": "integer",
+ "required": True,
+ "min": 1,
},
- 'splits': {
- 'type': 'integer',
- 'required': True,
- 'min': 1,
+ "splits": {
+ "type": "integer",
+ "required": True,
+ "min": 1,
},
- 'position': {
- 'type': 'dict',
- 'required': True,
- 'schema': {
- 'x': {
- 'type': 'integer',
- 'required': True
- },
- 'y': {
- 'type': 'integer',
- 'required': True
- },
- 'width': {
- 'type': 'integer',
- 'required': True
- },
- 'height': {
- 'type': 'integer',
- 'required': True
- },
- }
- }
- }
- }
+ "position": {
+ "type": "dict",
+ "required": True,
+ "schema": {
+ "x": {"type": "integer", "required": True},
+ "y": {"type": "integer", "required": True},
+ "width": {"type": "integer", "required": True},
+ "height": {"type": "integer", "required": True},
+ },
+ },
+ },
+ },
}
- }
+ },
},
- 'app': {
- 'type': 'dict',
- 'required': True,
- 'schema': {
- 'base_url': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
- },
- 'intro': {
- 'type': 'string',
- 'required': True,
- 'empty': False
+ "app": {
+ "type": "dict",
+ "required": True,
+ "schema": {
+ "base_url": {
+ "type": "string",
+ "required": True,
+ "empty": False,
},
- 'footer': {
- 'type': 'dict',
- 'schema': {
- 'center': {
- 'type': 'dict',
- 'schema': {
- 'label': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
+ "intro": {"type": "string", "required": True, "empty": False},
+ "footer": {
+ "type": "dict",
+ "schema": {
+ "center": {
+ "type": "dict",
+ "schema": {
+ "label": {
+ "type": "string",
+ "required": True,
+ "empty": False,
},
- 'url': {
- 'type': 'string',
- 'required': False,
- }
- }
+ "url": {
+ "type": "string",
+ "required": False,
+ },
+ },
},
- 'right': {
- 'type': 'dict',
- 'schema': {
- 'label': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
+ "right": {
+ "type": "dict",
+ "schema": {
+ "label": {
+ "type": "string",
+ "required": True,
+ "empty": False,
+ },
+ "url": {
+ "type": "string",
+ "required": False,
},
- 'url': {
- 'type': 'string',
- 'required': False,
- }
- }
- }
- }
+ },
+ },
+ },
},
- 'item': {
- 'type': 'dict',
- 'required': True,
- 'schema': {
- 'texts': {
- 'type': 'list',
- 'schema': {
- 'type': 'dict',
- 'schema': {
- 'name': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
+ "item": {
+ "type": "dict",
+ "required": True,
+ "schema": {
+ "texts": {
+ "type": "list",
+ "schema": {
+ "type": "dict",
+ "schema": {
+ "name": {
+ "type": "string",
+ "required": True,
+ "empty": False,
},
- 'label': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
- }
- }
- }
+ "label": {
+ "type": "string",
+ "required": True,
+ "empty": False,
+ },
+ },
+ },
},
- 'fields': {
- 'type': 'list',
- 'schema': {
- 'type': 'dict',
- 'schema': {
- 'name': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
+ "fields": {
+ "type": "list",
+ "schema": {
+ "type": "dict",
+ "schema": {
+ "name": {
+ "type": "string",
+ "required": True,
+ "empty": False,
},
- 'label': {
- 'type': 'string',
- 'required': True,
- 'empty': False,
- }
- }
- }
- }
- }
- }
- }
+ "label": {
+ "type": "string",
+ "required": True,
+ "empty": False,
+ },
+ },
+ },
+ },
+ },
+ },
+ },
},
- 'debug': {
- 'type': 'boolean',
- 'default': False,
+ "debug": {
+ "type": "boolean",
+ "default": False,
},
- 'logging': {
- 'type': 'dict'
- }
+ "logging": {"type": "dict"},
}
@@ -287,10 +265,10 @@ def validate_config(config: dict) -> dict:
else:
error_list = []
- def walk_error_tree(err: Union[dict, list], path: str) -> None:
+ def walk_error_tree(err: dict | list, path: str) -> None:
if isinstance(err, dict):
for key, value in err.items():
- walk_error_tree(value, path + (str(key), ))
+ walk_error_tree(value, (*path, str(key)))
elif isinstance(err, list):
for sub_err in err:
walk_error_tree(sub_err, path)
@@ -298,13 +276,14 @@ def walk_error_tree(err: Union[dict, list], path: str) -> None:
error_list.append(f'{".".join(path)}: {err}')
walk_error_tree(validator.errors, ())
- error_str = '\n'.join(error_list)
- raise click.ClickException(f'Configuration errors:\n\n{error_str}')
+ error_str = "\n".join(error_list)
+ msg = f"Configuration errors:\n\n{error_str}"
+ raise click.ClickException(msg)
@click.group()
-@click.option('-v', '--verbose', count=True)
-@click.option('-c', '--config', default='production.yml')
+@click.option("-v", "--verbose", count=True)
+@click.option("-c", "--config", default="production.yml")
@click.pass_context
def cli(ctx, verbose, config):
"""Museum Map CLI"""
@@ -313,13 +292,13 @@ def cli(ctx, verbose, config):
logging.basicConfig(level=logging.INFO)
elif verbose > 1:
logging.basicConfig(level=logging.DEBUG)
- logger.debug('Logging set up')
+ logger.debug("Logging set up")
if not os.path.exists(config):
- logger.error(f'Configuration file {config} not found')
+ logger.error(f"Configuration file {config} not found")
sys.exit(1)
with open(config) as in_f:
- config = yaml.load(in_f, Loader=yaml.FullLoader)
- ctx.obj['config'] = validate_config(config)
+ config = yaml.safe_load(in_f)
+ ctx.obj["config"] = validate_config(config)
async def pipeline_impl(config):
@@ -334,7 +313,7 @@ async def pipeline_impl(config):
@click.pass_context
def pipeline(ctx):
"""Run the full processing pipline."""
- asyncio.run(pipeline_impl(ctx.obj['config']))
+ asyncio.run(pipeline_impl(ctx.obj["config"]))
cli.add_command(pipeline)
diff --git a/museum_map/cli/db.py b/museum_map/cli/db.py
index bdf132f..e65d669 100644
--- a/museum_map/cli/db.py
+++ b/museum_map/cli/db.py
@@ -1,13 +1,13 @@
import asyncio
-import click
import json
import os
import shutil
import subprocess
-from museum_map.cli.util import ClickIndeterminate
+import click
-from ..models import create_engine, create_sessionmaker, Base, Item
+from museum_map.cli.util import ClickIndeterminate
+from museum_map.models import Base, Item, create_engine, create_sessionmaker
async def init_impl(config, drop_existing):
@@ -20,21 +20,21 @@ async def init_impl(config, drop_existing):
@click.command()
-@click.option('--drop-existing', is_flag=True, help='Drop any existing tables.')
+@click.option("--drop-existing", is_flag=True, help="Drop any existing tables.")
@click.pass_context
def init(ctx, drop_existing):
"""Initialise the database."""
- asyncio.run(init_impl(ctx.obj['config'], drop_existing))
+ asyncio.run(init_impl(ctx.obj["config"], drop_existing))
async def load_impl(config, source):
"""Load the metadata."""
- progress = ClickIndeterminate('Loading items')
+ progress = ClickIndeterminate("Loading items")
progress.start()
async with create_sessionmaker(config)() as dbsession:
for basepath, _, filenames in os.walk(source):
for filename in filenames:
- if filename.endswith('.json'):
+ if filename.endswith(".json"):
with open(os.path.join(basepath, filename)) as in_f:
dbsession.add(Item(attributes=json.load(in_f)))
await dbsession.commit()
@@ -42,31 +42,51 @@ async def load_impl(config, source):
@click.command()
-@click.argument('source')
+@click.argument("source")
@click.pass_context
def load(ctx, source):
"""Load the metadata."""
- asyncio.run(load_impl(ctx.obj['config'], source))
+ asyncio.run(load_impl(ctx.obj["config"], source))
@click.command()
-@click.argument('source')
-@click.argument('target')
+@click.argument("source")
+@click.argument("target")
@click.pass_context
-def load_images(ctx, source, target):
+def load_images(ctx, source, target): # noqa: ARG001
"""Load and convert images."""
- progress = ClickIndeterminate('Loading images')
+ progress = ClickIndeterminate("Loading images")
progress.start()
for basepath, _, filenames in os.walk(source):
for filename in filenames:
- if filename.endswith('.jpg'):
- image_id = filename[:filename.find('.')]
+ if filename.endswith(".jpg"):
+ image_id = filename[: filename.find(".")]
os.makedirs(os.path.join(target, *image_id), exist_ok=True)
image_source = os.path.join(basepath, filename)
image_target = os.path.join(target, *image_id, filename)
shutil.copy(image_source, image_target)
- subprocess.run(['gm', 'convert', image_source, '-resize', '240x240', image_target.replace('.jpg', '-240.jpg')])
- subprocess.run(['gm', 'convert', image_source, '-resize', '320x320', image_target.replace('.jpg', '-320.jpg')])
+ subprocess.run(
+ [ # noqa: S603 S607
+ "gm",
+ "convert",
+ image_source,
+ "-resize",
+ "240x240",
+ image_target.replace(".jpg", "-240.jpg"),
+ ],
+ check=True,
+ )
+ subprocess.run(
+ [ # noqa: S603 S607
+ "gm",
+ "convert",
+ image_source,
+ "-resize",
+ "320x320",
+ image_target.replace(".jpg", "-320.jpg"),
+ ],
+ check=True,
+ )
progress.stop()
diff --git a/museum_map/cli/groups.py b/museum_map/cli/groups.py
index 42f381d..1525461 100644
--- a/museum_map/cli/groups.py
+++ b/museum_map/cli/groups.py
@@ -1,56 +1,60 @@
import asyncio
-import click
-import inflection
import math
-
from collections import Counter
+
+import click
+import inflection
from numpy import array
from scipy.spatial.distance import cosine
-from sqlalchemy import and_, or_, func
+from sqlalchemy import and_, func, or_
from sqlalchemy.future import select
from sqlalchemy.orm import selectinload
-from .items import apply_aat, apply_nlp
-from .util import ClickIndeterminate
-from ..models import Group, Item, create_sessionmaker
+from museum_map.cli.items import apply_aat, apply_nlp
+from museum_map.cli.util import ClickIndeterminate
+from museum_map.models import Group, Item, create_sessionmaker
async def generate_groups_impl(config):
"""Generate the basic groups."""
async with create_sessionmaker(config)() as dbsession:
- item_stmt = select(Item).filter(Item.group_id == None)
- count_stmt = select(func.count(Item.id)).filter(Item.group_id == None)
+ item_stmt = select(Item).filter(Item.group_id is None)
+ count_stmt = select(func.count(Item.id)).filter(Item.group_id is None)
count = await dbsession.execute(count_stmt)
result = await dbsession.execute(item_stmt)
categories = []
- with click.progressbar(result.scalars(), length=count.scalar_one(), label='Generating potential groups') as progress:
+ with click.progressbar(
+ result.scalars(), length=count.scalar_one(), label="Generating potential groups"
+ ) as progress:
for item in progress:
- for category in item.attributes['_categories']:
+ for category in item.attributes["_categories"]:
categories.append(category.lower())
- counts = [(cat, count) for cat, count in Counter(categories).most_common() if count >= 15]
+ counts = [(cat, count) for cat, count in Counter(categories).most_common() if count >= 15] # noqa: PLR2004
counts.sort(key=lambda c: c[1])
max_groups = len(counts)
- with click.progressbar(length=max_groups, label='Generating groups') as progress:
+ with click.progressbar(length=max_groups, label="Generating groups") as progress:
while counts:
category = counts[0][0]
group_stmt = select(Group).filter(Group.value == category)
result = await dbsession.execute(group_stmt)
group = result.scalars().first()
if group is None:
- group = Group(value=category, label=category[0].upper() + category[1:], split='basic')
+ group = Group(value=category, label=category[0].upper() + category[1:], split="basic")
dbsession.add(group)
result = await dbsession.execute(item_stmt)
for item in result.scalars():
- if category in item.attributes['_categories']:
+ if category in item.attributes["_categories"]:
item.group = group
await dbsession.commit()
categories = []
result = await dbsession.execute(item_stmt)
for item in result.scalars():
- for category in item.attributes['_categories']:
+ for category in item.attributes["_categories"]:
categories.append(category.lower())
old_counts = len(counts)
- counts = [(cat, count) for cat, count in Counter(categories).most_common() if count >= 15]
+ counts = [
+ (cat, count) for cat, count in Counter(categories).most_common() if count >= 15 # noqa: PLR2004
+ ]
counts.sort(key=lambda c: c[1])
progress.update(old_counts - len(counts))
await dbsession.commit()
@@ -60,13 +64,13 @@ async def generate_groups_impl(config):
@click.pass_context
def generate_groups(ctx):
"""Generate the basic groups."""
- asyncio.run(generate_groups_impl(ctx.obj['config']))
+ asyncio.run(generate_groups_impl(ctx.obj["config"]))
def fill_vector(group):
"""Create a full vector from a sparse vector in the database."""
vec = array([0 for _ in range(0, 300)], dtype=float)
- for dim, value in group.attributes['lda_vector']:
+ for dim, value in group.attributes["lda_vector"]:
vec[dim] = value
return vec
@@ -91,12 +95,12 @@ def split_by_similarity(dbsession, group):
if next_item:
sorted_items.append(next_item)
limit = len(group.items) / math.ceil(len(group.items) / 100)
- new_group = Group(value=group.value, label=group.label, parent=group, split='similar')
+ new_group = Group(value=group.value, label=group.label, parent=group, split="similar")
dbsession.add(new_group)
count = 0
for item in sorted_items:
if count > limit:
- new_group = Group(value=group.value, label=group.label, parent=group, split='similar')
+ new_group = Group(value=group.value, label=group.label, parent=group, split="similar")
dbsession.add(new_group)
count = 0
item.group = new_group
@@ -109,7 +113,9 @@ def split_by_attribute(dbsession, group, attr):
for item in group.items:
if attr in item.attributes and item.attributes[attr]:
values.extend(item.attributes[attr])
- categories = [(v, c) for v, c in Counter(values).most_common() if c < len(group.items) * 0.6666 and c >= 15]
+ categories = [
+ (v, c) for v, c in Counter(values).most_common() if c < len(group.items) * 0.6666 and c >= 15 # noqa: PLR2004
+ ]
if categories:
category_values = [v for v, _ in categories]
has_values = 0
@@ -121,15 +127,17 @@ def split_by_attribute(dbsession, group, attr):
break
if found:
has_values = has_values + 1
- if has_values / len(group.items) > 0.9:
+ if has_values / len(group.items) > 0.9: # noqa: PLR2004
categories.reverse()
for category in categories:
- new_group = Group(value=category[0], label=f'{group.label} - {category[0]}', parent=group, split='attribute')
+ new_group = Group(
+ value=category[0], label=f"{group.label} - {category[0]}", parent=group, split="attribute"
+ )
dbsession.add(new_group)
for item in list(group.items):
if category[0] in item.attributes[attr]:
item.group = new_group
- new_group = Group(value=group.label, label=group.label, parent=group, split='attribute')
+ new_group = Group(value=group.label, label=group.label, parent=group, split="attribute")
dbsession.add(new_group)
for item in list(group.items):
item.group = new_group
@@ -144,30 +152,35 @@ def split_by_year(config, dbsession, group):
centuries = []
with_year = 0
for item in group.items:
- if config['data']['year_field'] in item.attributes and item.attributes[config['data']['year_field']]:
- years.append(item.attributes[config['data']['year_field']])
+ if config["data"]["year_field"] in item.attributes and item.attributes[config["data"]["year_field"]]:
+ years.append(item.attributes[config["data"]["year_field"]])
with_year = with_year + 1
- if with_year / len(group.items) > 0.95:
+ if with_year / len(group.items) > 0.95: # noqa: PLR2004
common = [(int(v), c) for v, c in Counter(years).most_common()]
start_year = min([c for c, _ in common])
end_year = max([c for c, _ in common])
- if (start_year != end_year):
- year_boundaries = []
- if (end_year - start_year) <= 100 and (end_year - start_year) > 10:
+ if start_year != end_year:
+ if (end_year - start_year) <= 100 and (end_year - start_year) > 10: # noqa: PLR2004
start_decade = math.floor(start_year / 10)
end_decade = math.floor(end_year / 10)
decades = []
for start_year in range(start_decade * 10, (end_decade + 1) * 10, 10):
for item in list(group.items):
- if config['data']['year_field'] in item.attributes and item.attributes[config['data']['year_field']]:
- if start_year <= int(item.attributes[config['data']['year_field']]) and int(item.attributes[config['data']['year_field']]) < start_year + 10:
+ if (
+ config["data"]["year_field"] in item.attributes
+ and item.attributes[config["data"]["year_field"]]
+ ):
+ if (
+ start_year <= int(item.attributes[config["data"]["year_field"]])
+ and int(item.attributes[config["data"]["year_field"]]) < start_year + 10
+ ):
if len(decades) == 0 or decades[-1][0][0] != start_year:
decades.append([[start_year], 1])
else:
decades[-1][1] = decades[-1][1] + 1
idx = 0
while idx < len(decades) - 1:
- if decades[idx][1] + decades[idx + 1][1] < 100:
+ if decades[idx][1] + decades[idx + 1][1] < 100: # noqa: PLR2004
decades[idx][0].extend(decades[idx + 1][0])
decades[idx][1] = decades[idx][1] + decades[idx + 1][1]
decades.pop(idx + 1)
@@ -176,37 +189,54 @@ def split_by_year(config, dbsession, group):
for years, _ in decades:
new_group = None
for item in list(group.items):
- if config['data']['year_field'] in item.attributes and item.attributes[config['data']['year_field']]:
- if years[0] <= int(item.attributes[config['data']['year_field']]) and int(item.attributes[config['data']['year_field']]) < years[-1] + 10:
+ if (
+ config["data"]["year_field"] in item.attributes
+ and item.attributes[config["data"]["year_field"]]
+ ):
+ if (
+ years[0] <= int(item.attributes[config["data"]["year_field"]])
+ and int(item.attributes[config["data"]["year_field"]]) < years[-1] + 10
+ ):
if new_group is None:
if len(years) == 1:
- label = f'{years[0]}s'
+ label = f"{years[0]}s"
else:
- label = f'{years[0]}s-{years[-1]}s'
- new_group = Group(value=str(start_year), label=f'{group.label} - {label}', parent=group, split='time')
+ label = f"{years[0]}s-{years[-1]}s"
+ new_group = Group(
+ value=str(start_year),
+ label=f"{group.label} - {label}",
+ parent=group,
+ split="time",
+ )
dbsession.add(new_group)
item.group = new_group
if group.items:
- new_group = Group(value=group.label, label=group.label, parent=group, split='time')
+ new_group = Group(value=group.label, label=group.label, parent=group, split="time")
dbsession.add(new_group)
for item in list(group.items):
item.group = new_group
return True
- elif (end_year - start_year) > 100:
+ elif (end_year - start_year) > 100: # noqa: PLR2004
start_century = math.floor(start_year / 100)
end_century = math.floor(end_year / 100)
centuries = []
for start_year in range(start_century * 100, (end_century + 1) * 100, 100):
for item in list(group.items):
- if config['data']['year_field'] in item.attributes and item.attributes[config['data']['year_field']]:
- if start_year <= int(item.attributes[config['data']['year_field']]) and int(item.attributes[config['data']['year_field']]) < start_year + 100:
+ if (
+ config["data"]["year_field"] in item.attributes
+ and item.attributes[config["data"]["year_field"]]
+ ):
+ if (
+ start_year <= int(item.attributes[config["data"]["year_field"]])
+ and int(item.attributes[config["data"]["year_field"]]) < start_year + 100
+ ):
if len(centuries) == 0 or centuries[-1][0][0] != start_year:
centuries.append([[start_year], 1])
else:
centuries[-1][1] = centuries[-1][1] + 1
idx = 0
while idx < len(centuries) - 1:
- if centuries[idx][1] + centuries[idx + 1][1] < 100:
+ if centuries[idx][1] + centuries[idx + 1][1] < 100: # noqa: PLR2004
centuries[idx][0].extend(centuries[idx + 1][0])
centuries[idx][1] = centuries[idx][1] + centuries[idx + 1][1]
centuries.pop(idx + 1)
@@ -215,44 +245,55 @@ def split_by_year(config, dbsession, group):
for years, _ in centuries:
new_group = None
for item in list(group.items):
- if config['data']['year_field'] in item.attributes and item.attributes[config['data']['year_field']]:
- if years[0] <= int(item.attributes[config['data']['year_field']]) and int(item.attributes[config['data']['year_field']]) < years[-1] + 100:
+ if (
+ config["data"]["year_field"] in item.attributes
+ and item.attributes[config["data"]["year_field"]]
+ ):
+ if (
+ years[0] <= int(item.attributes[config["data"]["year_field"]])
+ and int(item.attributes[config["data"]["year_field"]]) < years[-1] + 100
+ ):
if new_group is None:
if len(years) == 1:
century = math.floor(years[0] / 100) + 1
- if century % 10 == 1 and century != 11:
- label = f'{century}st'
- elif century % 10 == 2 and century != 12:
- label = f'{century}nd'
- elif century % 10 == 3 and century != 13:
- label = f'{century}rd'
+ if century % 10 == 1 and century != 11: # noqa: PLR2004
+ label = f"{century}st"
+ elif century % 10 == 2 and century != 12: # noqa: PLR2004
+ label = f"{century}nd"
+ elif century % 10 == 3 and century != 13: # noqa: PLR2004
+ label = f"{century}rd"
else:
- label = f'{century}th'
+ label = f"{century}th"
else:
century = math.floor(years[0] / 100) + 1
- if century % 10 == 1 and century != 11:
- start_label = f'{century}st'
- elif century % 10 == 2 and century != 12:
- start_label = f'{century}nd'
- elif century % 10 == 3 and century != 13:
- start_label = f'{century}rd'
+ if century % 10 == 1 and century != 11: # noqa: PLR2004
+ start_label = f"{century}st"
+ elif century % 10 == 2 and century != 12: # noqa: PLR2004
+ start_label = f"{century}nd"
+ elif century % 10 == 3 and century != 13: # noqa: PLR2004
+ start_label = f"{century}rd"
else:
- start_label = f'{century}th'
+ start_label = f"{century}th"
century = math.floor(years[-1] / 100) + 1
- if century % 10 == 1 and century != 11:
- end_label = f'{century}st'
- elif century % 10 == 2 and century != 12:
- end_label = f'{century}nd'
- elif century % 10 == 3 and century != 13:
- end_label = f'{century}rd'
+ if century % 10 == 1 and century != 11: # noqa: PLR2004
+ end_label = f"{century}st"
+ elif century % 10 == 2 and century != 12: # noqa: PLR2004
+ end_label = f"{century}nd"
+ elif century % 10 == 3 and century != 13: # noqa: PLR2004
+ end_label = f"{century}rd"
else:
- end_label = f'{century}th'
- label = f'{start_label}-{end_label}'
- new_group = Group(value=str(start_year), label=f'{group.label} - {label} century', parent=group, split='time')
+ end_label = f"{century}th"
+ label = f"{start_label}-{end_label}"
+ new_group = Group(
+ value=str(start_year),
+ label=f"{group.label} - {label} century",
+ parent=group,
+ split="time",
+ )
dbsession.add(new_group)
item.group = new_group
if group.items:
- new_group = Group(value=group.label, label=group.label, parent=group, split='time')
+ new_group = Group(value=group.label, label=group.label, parent=group, split="time")
dbsession.add(new_group)
for item in list(group.items):
item.group = new_group
@@ -263,7 +304,7 @@ def split_by_year(config, dbsession, group):
async def split_large_groups_impl(config):
"""Split large groups into smaller ones."""
async with create_sessionmaker(config)() as dbsession:
- progress = ClickIndeterminate('Splitting large groups')
+ progress = ClickIndeterminate("Splitting large groups")
progress.start()
splitting = True
stmt = select(Group).options(selectinload(Group.items), selectinload(Group.children))
@@ -272,20 +313,20 @@ async def split_large_groups_impl(config):
result = await dbsession.execute(stmt)
for group in result.scalars():
if len(group.children) == 0:
- if len(group.items) > 120 and len(group.items) < 300:
+ if len(group.items) > 120 and len(group.items) < 300: # noqa: PLR2004
if split_by_year(config, dbsession, group):
splitting = True
else:
split_by_similarity(dbsession, group)
splitting = True
- elif len(group.items) >= 300:
- if split_by_attribute(dbsession, group, 'concepts'):
+ elif len(group.items) >= 300: # noqa: PLR2004
+ if split_by_attribute(dbsession, group, "concepts"):
splitting = True
- elif split_by_attribute(dbsession, group, 'subjects'):
+ elif split_by_attribute(dbsession, group, "subjects"):
splitting = True
- elif split_by_attribute(dbsession, group, 'materials'):
+ elif split_by_attribute(dbsession, group, "materials"):
splitting = True
- elif split_by_attribute(dbsession, group, 'techniques'):
+ elif split_by_attribute(dbsession, group, "techniques"):
splitting = True
elif split_by_year(config, dbsession, group):
splitting = True
@@ -300,13 +341,13 @@ async def split_large_groups_impl(config):
@click.pass_context
def split_large_groups(ctx):
"""Split large groups into smaller ones."""
- asyncio.run(split_large_groups_impl(ctx.obj['config']))
+ asyncio.run(split_large_groups_impl(ctx.obj["config"]))
async def merge_singular_plural_impl(config):
"""Merge singular and plural groups."""
async with create_sessionmaker(config)() as dbsession:
- progress = ClickIndeterminate('Merging singular and plural')
+ progress = ClickIndeterminate("Merging singular and plural")
progress.start()
modifying = True
while modifying:
@@ -314,8 +355,11 @@ async def merge_singular_plural_impl(config):
stmt = select(Group)
result = await dbsession.execute(stmt)
for group in result.scalars():
- stmt = select(Group).filter(and_(Group.value == inflection.singularize(group.value),
- Group.id != group.id)).options(selectinload(Group.items))
+ stmt = (
+ select(Group)
+ .filter(and_(Group.value == inflection.singularize(group.value), Group.id != group.id))
+ .options(selectinload(Group.items))
+ )
result = await dbsession.execute(stmt)
other = result.scalars().first()
if other:
@@ -333,19 +377,21 @@ async def merge_singular_plural_impl(config):
@click.pass_context
def merge_singular_plural(ctx):
"""Merge singular and plural groups."""
- asyncio.run(merge_singular_plural_impl(ctx.obj['config']))
+ asyncio.run(merge_singular_plural_impl(ctx.obj["config"]))
async def add_parent_groups_impl(config):
"""Add any required parent groups."""
async with create_sessionmaker(config)() as dbsession:
- stmt = select(Group).filter(Group.parent_id == None).options(selectinload(Group.parent))
+ stmt = select(Group).filter(Group.parent_id is None).options(selectinload(Group.parent))
result = await dbsession.execute(stmt)
- stmt = select(func.count(Group.id)).filter(Group.parent_id == None)
+ stmt = select(func.count(Group.id)).filter(Group.parent_id is None)
result_count = await dbsession.execute(stmt)
- with click.progressbar(result.scalars(), length=result_count.scalar_one(), label='Adding parent groups') as progress:
+ with click.progressbar(
+ result.scalars(), length=result_count.scalar_one(), label="Adding parent groups"
+ ) as progress:
for group in progress:
- if 'aat' in config['data']['hierarchy']['expansions']:
+ if "aat" in config["data"]["hierarchy"]["expansions"]:
categories = apply_aat(group.value, merge=False)
if categories:
for category_list in categories:
@@ -355,11 +401,13 @@ async def add_parent_groups_impl(config):
result = await dbsession.execute(stmt)
parent_group = result.scalars().first()
if not parent_group:
- parent_group = Group(value=category, label=category[0].upper() + category[1:], split='parent')
+ parent_group = Group(
+ value=category, label=category[0].upper() + category[1:], split="parent"
+ )
dbsession.add(group)
group.parent = parent_group
mapped = True
- group = parent_group
+ group = parent_group # noqa: PLW2901
if group.parent_id:
break
if mapped:
@@ -367,7 +415,9 @@ async def add_parent_groups_impl(config):
else:
mapped = False
for category in apply_nlp(group.value):
- stmt = select(Group).filter(or_(Group.value == category, Group.value == inflection.pluralize(category)))
+ stmt = select(Group).filter(
+ or_(Group.value == category, Group.value == inflection.pluralize(category))
+ )
result = await dbsession.execute(stmt)
parent_group = result.scalars().first()
if parent_group:
@@ -376,13 +426,17 @@ async def add_parent_groups_impl(config):
mapped = True
break
if not mapped:
- if group.value not in ['styles and periods']:
+ if group.value not in ["styles and periods"]:
for category in apply_nlp(group.value):
hierarchies = apply_aat(category, merge=False)
groups = []
for hierarchy in hierarchies:
if group.value not in hierarchy:
- stmt = select(Group).filter(Group.value.in_(hierarchy)).options(selectinload(Group.items))
+ stmt = (
+ select(Group)
+ .filter(Group.value.in_(hierarchy))
+ .options(selectinload(Group.items))
+ )
result = await dbsession.execute(stmt)
for potential_group in result.scalars():
depth = 0
@@ -402,13 +456,13 @@ async def add_parent_groups_impl(config):
@click.pass_context
def add_parent_groups(ctx):
"""Add any required parent groups."""
- asyncio.run(add_parent_groups_impl(ctx.obj['config']))
+ asyncio.run(add_parent_groups_impl(ctx.obj["config"]))
async def prune_single_groups_impl(config):
"""Remove groups that have a single child and no items."""
async with create_sessionmaker(config)() as dbsession:
- progress = ClickIndeterminate('Pruning single groups')
+ progress = ClickIndeterminate("Pruning single groups")
progress.start()
pruning = True
stmt = select(Group).options(selectinload(Group.children), selectinload(Group.items))
@@ -429,13 +483,13 @@ async def prune_single_groups_impl(config):
@click.pass_context
def prune_single_groups(ctx):
"""Remove groups that have a single child and no items."""
- asyncio.run(prune_single_groups_impl(ctx.obj['config']))
+ asyncio.run(prune_single_groups_impl(ctx.obj["config"]))
async def move_inner_items_impl(config):
"""Move items from non-leaf groups into extra leaf groups."""
async with create_sessionmaker(config)() as dbsession:
- progress = ClickIndeterminate('Moving inner items')
+ progress = ClickIndeterminate("Moving inner items")
progress.start()
moving = True
stmt = select(Group).options(selectinload(Group.children), selectinload(Group.items))
@@ -444,7 +498,7 @@ async def move_inner_items_impl(config):
result = await dbsession.execute(stmt)
for group in result.scalars():
if len(group.items) > 0 and len(group.children) > 0:
- sub_group = Group(value=group.value, label=group.label, split='inner')
+ sub_group = Group(value=group.value, label=group.label, split="inner")
dbsession.add(sub_group)
sub_group.parent = group
for item in list(group.items):
@@ -461,7 +515,7 @@ async def move_inner_items_impl(config):
@click.pass_context
def move_inner_items(ctx):
"""Move items from non-leaf groups into extra leaf groups."""
- asyncio.run(move_inner_items_impl(ctx.obj['config']))
+ asyncio.run(move_inner_items_impl(ctx.obj["config"]))
async def pipeline_impl(config):
@@ -478,7 +532,7 @@ async def pipeline_impl(config):
@click.pass_context
def pipeline(ctx):
"""Run the group processing pipeline."""
- asyncio.run(pipeline_impl(ctx.obj['config']))
+ asyncio.run(pipeline_impl(ctx.obj["config"]))
@click.group()
diff --git a/museum_map/cli/items.py b/museum_map/cli/items.py
index b4df5ae..19f7e60 100644
--- a/museum_map/cli/items.py
+++ b/museum_map/cli/items.py
@@ -1,17 +1,17 @@
import asyncio
-import click
import json
import os
+
+import click
import requests
import spacy
-
from gensim import corpora, models
from lxml import etree
from sqlalchemy import func
from sqlalchemy.future import select
-from ..models import create_sessionmaker, Item
-from .util import ClickIndeterminate
+from museum_map.cli.util import ClickIndeterminate
+from museum_map.models import Item, create_sessionmaker
async def tokenise_impl(config):
@@ -20,16 +20,16 @@ async def tokenise_impl(config):
async with create_sessionmaker(config)() as dbsession:
count = await dbsession.execute(select(func.count(Item.id)))
result = await dbsession.execute(select(Item))
- with click.progressbar(result.scalars(), length=count.scalar_one(), label='Tokenising items') as progress:
+ with click.progressbar(result.scalars(), length=count.scalar_one(), label="Tokenising items") as progress:
for item in progress:
- text = ''
- for field in config['data']['topic_fields']:
+ text = ""
+ for field in config["data"]["topic_fields"]:
if field in item.attributes and item.attributes[field].strip():
- if item.attributes[field].strip().endswith('.'):
- text = f'{text} {item.attributes[field].strip()}'
+ if item.attributes[field].strip().endswith("."):
+ text = f"{text} {item.attributes[field].strip()}"
else:
- text = f'{text} {item.attributes[field].strip()}.'
- item.attributes['_tokens'] = [t.lemma_ for t in nlp(text) if not t.pos_ in ['PUNCT', 'SPACE']]
+ text = f"{text} {item.attributes[field].strip()}."
+ item.attributes["_tokens"] = [t.lemma_ for t in nlp(text) if t.pos_ not in ["PUNCT", "SPACE"]]
await dbsession.commit()
@@ -37,65 +37,65 @@ async def tokenise_impl(config):
@click.pass_context
def tokenise(ctx):
"""Generate token lists for each item."""
- asyncio.run(tokenise_impl(ctx.obj['config']))
+ asyncio.run(tokenise_impl(ctx.obj["config"]))
def strip_article(text):
"""Strip any indefinite article from the beginning of the text."""
text = text.strip()
- if text.startswith('a '):
- return text[2:].strip().strip('()[]')
- elif text.startswith('an '):
- return text[3:].strip().strip('()[]')
+ if text.startswith("a "):
+ return text[2:].strip().strip("()[]")
+ elif text.startswith("an "):
+ return text[3:].strip().strip("()[]")
else:
- return text.strip('()[]')
+ return text.strip("()[]")
def apply_nlp(category):
"""Recursively apply the NLP processing rules."""
- if ' ' in category:
- if ' for ' in category:
- idx = category.find(' for ')
+ if " " in category:
+ if " for " in category:
+ idx = category.find(" for ")
prefix = strip_article(category[:idx])
- suffix = strip_article(category[idx + 5:])
- return [suffix, prefix] + apply_nlp(suffix) + apply_nlp(prefix)
- elif '(' in category:
- start = category.find('(')
- end = category.find(')')
- outer = strip_article((category[:start] + ' ' + category[end + 1:]))
- inner = strip_article(category[start + 1:end])
- return [outer, inner] + apply_nlp(outer) + apply_nlp(inner)
- elif ' with ' in category:
- idx = category.find(' with ')
+ suffix = strip_article(category[idx + 5 :])
+ return [suffix, prefix, *apply_nlp(suffix), *apply_nlp(prefix)]
+ elif "(" in category:
+ start = category.find("(")
+ end = category.find(")")
+ outer = strip_article(category[:start] + " " + category[end + 1 :])
+ inner = strip_article(category[start + 1 : end])
+ return [outer, inner, *apply_nlp(outer), *apply_nlp(inner)]
+ elif " with " in category:
+ idx = category.find(" with ")
prefix = strip_article(category[:idx])
- suffix = strip_article(category[idx + 6:])
- return [prefix, suffix] + apply_nlp(prefix) + apply_nlp(suffix)
- elif ' of ' in category:
- idx = category.find(' of ')
+ suffix = strip_article(category[idx + 6 :])
+ return [prefix, suffix, *apply_nlp(prefix), *apply_nlp(suffix)]
+ elif " of " in category:
+ idx = category.find(" of ")
prefix = strip_article(category[:idx])
- suffix = strip_article(category[idx + 4:])
- if prefix in ['pair', 'copy', 'base', 'fragments', 'figure', 'copy']:
- return [suffix] + apply_nlp(suffix)
+ suffix = strip_article(category[idx + 4 :])
+ if prefix in ["pair", "copy", "base", "fragments", "figure", "copy"]:
+ return [suffix, *apply_nlp(suffix)]
else:
- return [suffix, prefix] + apply_nlp(suffix) + apply_nlp(prefix)
- elif ' from ' in category:
- idx = category.find(' from ')
+ return [suffix, prefix, *apply_nlp(suffix), *apply_nlp(prefix)]
+ elif " from " in category:
+ idx = category.find(" from ")
prefix = strip_article(category[:idx])
- suffix = strip_article(category[idx + 4:])
- if prefix in ['pair', 'copy', 'base', 'fragments', 'figure', 'copy']:
- return [suffix] + apply_nlp(suffix)
+ suffix = strip_article(category[idx + 4 :])
+ if prefix in ["pair", "copy", "base", "fragments", "figure", "copy"]:
+ return [suffix, *apply_nlp(suffix)]
else:
- return [suffix, prefix] + apply_nlp(suffix) + apply_nlp(prefix)
- elif '&' in category:
- categories = [strip_article(c) for c in category.split('&')]
+ return [suffix, prefix, *apply_nlp(suffix), *apply_nlp(prefix)]
+ elif "&" in category:
+ categories = [strip_article(c) for c in category.split("&")]
for cat in list(categories):
categories = categories + apply_nlp(cat)
return categories
- elif ' and ' in category or ',' in category:
+ elif " and " in category or "," in category:
categories = []
- while ' and ' in category or ',' in category:
- and_idx = category.find(' and ')
- comma_idx = category.find(',')
+ while " and " in category or "," in category:
+ and_idx = category.find(" and ")
+ comma_idx = category.find(",")
if and_idx >= 0 and comma_idx >= 0:
idx = min(and_idx, comma_idx)
elif and_idx >= 0:
@@ -106,65 +106,69 @@ def apply_nlp(category):
idx = -1
if idx >= 0:
categories.append(strip_article(category[:idx]))
- if category[idx] == ',':
- category = category[idx + 1:]
+ if category[idx] == ",":
+ category = category[idx + 1 :]
else:
- category = category[idx + 5:]
- if category.strip().strip('()[]'):
- categories.append(strip_article(category.strip().strip('()[]')))
+ category = category[idx + 5 :]
+ if category.strip().strip("()[]"):
+ categories.append(strip_article(category.strip().strip("()[]")))
for cat in list(categories):
categories = categories + apply_nlp(cat)
return categories
- elif ' or ' in category:
+ elif " or " in category:
categories = []
- while ' or ' in category:
- idx = category.find(' or ')
+ while " or " in category:
+ idx = category.find(" or ")
if idx >= 0:
categories.append(strip_article(category[:idx]))
- category = category[idx + 4:].strip().strip('()[]')
- if category.strip().strip('()[]'):
+ category = category[idx + 4 :].strip().strip("()[]")
+ if category.strip().strip("()[]"):
categories.append(strip_article(category))
for cat in list(categories):
categories = categories + apply_nlp(cat)
return categories
else:
categories = category.split()
- return [' '.join(categories[-idx:]) for idx in range(len(categories) - 1, 0, -1)]
+ return [" ".join(categories[-idx:]) for idx in range(len(categories) - 1, 0, -1)]
else:
return []
-def apply_aat(category, merge=True):
+def apply_aat(category, merge=True): # noqa: FBT002
"""Expand the category using the AAT."""
- if os.path.exists('aat.json'):
- with open('aat.json') as in_f:
+ if os.path.exists("aat.json"):
+ with open("aat.json") as in_f:
cache = json.load(in_f)
else:
cache = {}
if category not in cache:
cache[category] = []
- response = requests.get('http://vocabsservices.getty.edu/AATService.asmx/AATGetTermMatch',
- params=[('term', f'"{category}"'),
- ('logop', 'and'),
- ('notes', '')])
- if response.status_code == 200:
- subjects = etree.fromstring(response.content).xpath('Subject/Subject_ID/text()')
+ response = requests.get(
+ "http://vocabsservices.getty.edu/AATService.asmx/AATGetTermMatch",
+ params=[("term", f'"{category}"'), ("logop", "and"), ("notes", "")],
+ timeout=300,
+ )
+ if response.status_code == 200: # noqa: PLR2004
+ subjects = etree.fromstring(response.content).xpath("Subject/Subject_ID/text()") # noqa: S320
hierarchies = []
for subject in subjects:
- response2 = requests.get('http://vocabsservices.getty.edu/AATService.asmx/AATGetSubject',
- params=[('subjectID', subject)])
- if response.status_code == 200:
- hierarchy_text = etree.fromstring(response2.content).xpath('Subject/Hierarchy/text()')
+ response2 = requests.get(
+ "http://vocabsservices.getty.edu/AATService.asmx/AATGetSubject",
+ params=[("subjectID", subject)],
+ timeout=300,
+ )
+ if response.status_code == 200: # noqa: PLR2004
+ hierarchy_text = etree.fromstring(response2.content).xpath("Subject/Hierarchy/text()") # noqa: S320
if hierarchy_text:
hierarchy = []
- for entry in [h.strip() for h in hierarchy_text[0].split('|') if '<' not in h]:
- entry = entry.lower()
- if '(' in entry:
- entry = entry[:entry.find('(')].strip()
- if entry.endswith(' facet'):
- entry = entry[:entry.find(' facet')].strip()
- if entry.endswith(' genres'):
- entry = entry[:entry.find(' genres')].strip()
+ for entry in [h.strip() for h in hierarchy_text[0].split("|") if "<" not in h]:
+ entry = entry.lower() # noqa: PLW2901
+ if "(" in entry:
+ entry = entry[: entry.find("(")].strip() # noqa: PLW2901
+ if entry.endswith(" facet"):
+ entry = entry[: entry.find(" facet")].strip() # noqa: PLW2901
+ if entry.endswith(" genres"):
+ entry = entry[: entry.find(" genres")].strip() # noqa: PLW2901
if entry not in hierarchy:
hierarchy.append(entry)
hierarchies.append(hierarchy)
@@ -172,11 +176,11 @@ def apply_aat(category, merge=True):
for hierarchy in hierarchies:
for start in range(0, len(hierarchy)):
if hierarchy[start] not in cache:
- if hierarchy[start + 1:]:
- cache[hierarchy[start]] = [hierarchy[start + 1:]]
+ if hierarchy[start + 1 :]:
+ cache[hierarchy[start]] = [hierarchy[start + 1 :]]
else:
cache[hierarchy[start]] = []
- with open('aat.json', 'w') as out_f:
+ with open("aat.json", "w") as out_f:
json.dump(cache, out_f)
if merge:
if len(cache[category]) > 1:
@@ -204,16 +208,16 @@ async def expand_categories_impl(config):
async with create_sessionmaker(config)() as dbsession:
count = await dbsession.execute(select(func.count(Item.id)))
result = await dbsession.execute(select(Item))
- with click.progressbar(result.scalars(), length=count.scalar_one(), label='Expanding categories') as progress:
+ with click.progressbar(result.scalars(), length=count.scalar_one(), label="Expanding categories") as progress:
for item in progress:
- categories = [c.lower() for c in item.attributes[config['data']['hierarchy']['field']]]
- if 'nlp' in config['data']['hierarchy']['expansions']:
- for category in item.attributes[config['data']['hierarchy']['field']]:
+ categories = [c.lower() for c in item.attributes[config["data"]["hierarchy"]["field"]]]
+ if "nlp" in config["data"]["hierarchy"]["expansions"]:
+ for category in item.attributes[config["data"]["hierarchy"]["field"]]:
categories = categories + apply_nlp(category.lower())
- if 'aat' in config['data']['hierarchy']['expansions']:
+ if "aat" in config["data"]["hierarchy"]["expansions"]:
for category in list(categories):
categories = categories + apply_aat(category)
- item.attributes['_categories'] = categories
+ item.attributes["_categories"] = categories
await dbsession.commit()
@@ -221,41 +225,44 @@ async def expand_categories_impl(config):
@click.pass_context
def expand_categories(ctx):
"""Expand the object categories."""
- asyncio.run(expand_categories_impl(ctx.obj['config']))
+ asyncio.run(expand_categories_impl(ctx.obj["config"]))
async def generate_topic_vectors_impl(config):
"""Generate topic vectors for all items."""
async with create_sessionmaker(config)() as dbsession:
- async def texts(dictionary=None, label=''):
+
+ async def texts(dictionary=None, label=""):
count = await dbsession.execute(select(func.count(Item.id)))
result = await dbsession.execute(select(Item))
with click.progressbar(result.scalars(), length=count.scalar_one(), label=label) as progress:
for item in progress:
- if '_tokens' in item.attributes:
+ if "_tokens" in item.attributes:
if dictionary:
- yield dictionary.doc2bow(item.attributes['_tokens'])
+ yield dictionary.doc2bow(item.attributes["_tokens"])
else:
- yield item.attributes['_tokens']
+ yield item.attributes["_tokens"]
dictionary = corpora.Dictionary()
- async for tokens in texts(label='Generating dictionary'):
+ async for tokens in texts(label="Generating dictionary"):
dictionary.add_documents([tokens])
dictionary.filter_extremes(keep_n=None)
corpus = []
- async for bow in texts(dictionary=dictionary, label='Generating corpus'):
+ async for bow in texts(dictionary=dictionary, label="Generating corpus"):
corpus.append(bow)
- waiting = ClickIndeterminate('Generating model')
+ waiting = ClickIndeterminate("Generating model")
waiting.start()
model = models.LdaModel(corpus, num_topics=300, id2word=dictionary, update_every=0)
waiting.stop()
count = await dbsession.execute(select(func.count(Item.id)))
result = await dbsession.execute(select(Item))
- with click.progressbar(result.scalars(), length=count.scalar_one(), label='Generating topic vectors') as progress:
+ with click.progressbar(
+ result.scalars(), length=count.scalar_one(), label="Generating topic vectors"
+ ) as progress:
for item in progress:
- if '_tokens' in item.attributes:
- vec = model[dictionary.doc2bow(item.attributes['_tokens'])]
- item.attributes['lda_vector'] = [(wid, float(prob)) for wid, prob in vec]
+ if "_tokens" in item.attributes:
+ vec = model[dictionary.doc2bow(item.attributes["_tokens"])]
+ item.attributes["lda_vector"] = [(wid, float(prob)) for wid, prob in vec]
await dbsession.commit()
@@ -263,7 +270,7 @@ async def texts(dictionary=None, label=''):
@click.pass_context
def generate_topic_vectors(ctx):
"""Generate topic vectors for all items."""
- asyncio.run(generate_topic_vectors_impl(ctx.obj['config']))
+ asyncio.run(generate_topic_vectors_impl(ctx.obj["config"]))
async def pipeline_impl(config):
@@ -272,11 +279,13 @@ async def pipeline_impl(config):
await tokenise_impl(config)
await generate_topic_vectors_impl(config)
+
@click.command()
@click.pass_context
def pipeline(ctx):
"""Run the items processing pipeline."""
- asyncio.run(pipeline_impl(ctx.obj['config']))
+ asyncio.run(pipeline_impl(ctx.obj["config"]))
+
@click.group()
def items():
diff --git a/museum_map/cli/layout.py b/museum_map/cli/layout.py
index d085e2b..32a224b 100644
--- a/museum_map/cli/layout.py
+++ b/museum_map/cli/layout.py
@@ -1,17 +1,17 @@
import asyncio
-import click
import math
-
from copy import deepcopy
+from random import choice
+
+import click
from inflection import pluralize
-from random import sample, choice
from scipy.spatial.distance import cosine
-from sqlalchemy import func, delete
+from sqlalchemy import delete, func
from sqlalchemy.future import select
from sqlalchemy.orm import selectinload
-from .groups import fill_vector
-from ..models import Item, Group, Room, Floor, FloorTopic, create_sessionmaker
+from museum_map.cli.groups import fill_vector
+from museum_map.models import Floor, FloorTopic, Group, Item, Room, create_sessionmaker
async def count_items(dbsession, group):
@@ -34,7 +34,9 @@ def walk(node):
for child in node.children:
walk(child)
- stmt = select(Group).options(selectinload(Group.parent), selectinload(Group.children), selectinload(Group.items), selectinload(Group.room))
+ stmt = select(Group).options(
+ selectinload(Group.parent), selectinload(Group.children), selectinload(Group.items), selectinload(Group.room)
+ )
result = await dbsession.execute(stmt)
for root in result.scalars():
if root.parent is None:
@@ -45,29 +47,29 @@ def walk(node):
def pluralize_label(label):
"""Pluralise the label."""
- if ' ' in label:
- if ' - ' in label:
- parts = label.split(' - ')
+ if " " in label:
+ if " - " in label:
+ parts = label.split(" - ")
parts[0] = pluralize_label(parts[0])
- label = ' - '.join(parts)
- elif ' of ' in label:
- part = label[:label.find(' of ')]
+ label = " - ".join(parts)
+ elif " of " in label:
+ part = label[: label.find(" of ")]
label = f'{pluralize_label(part)}{label[label.find(" of "):]}'
- elif ' for ' in label:
- part = label[:label.find(' for ')]
+ elif " for " in label:
+ part = label[: label.find(" for ")]
label = f'{pluralize_label(part)}{label[label.find(" for "):]}'
- elif ' and ' in label:
- part1 = label[:label.find(' and ')]
- part2 = label[label.find(' and ') + 5:]
- label = f'{pluralize_label(part1)} and {pluralize_label(part2)}'
- elif ' or ' in label:
- part1 = label[:label.find(' or ')]
- part2 = label[label.find(' or ') + 4:]
- label = f'{pluralize_label(part1)} or {pluralize_label(part2)}'
+ elif " and " in label:
+ part1 = label[: label.find(" and ")]
+ part2 = label[label.find(" and ") + 5 :]
+ label = f"{pluralize_label(part1)} and {pluralize_label(part2)}"
+ elif " or " in label:
+ part1 = label[: label.find(" or ")]
+ part2 = label[label.find(" or ") + 4 :]
+ label = f"{pluralize_label(part1)} or {pluralize_label(part2)}"
else:
- parts = label.split(' ')
+ parts = label.split(" ")
parts[-1] = pluralize(parts[-1])
- label = ' '.join(parts)
+ label = " ".join(parts)
else:
label = pluralize(label)
return label
@@ -79,16 +81,20 @@ async def generate_rooms(dbsession, floor, nr, room_ids, rooms, assigned):
rid = room_ids.pop()
room = rooms[rid]
splits_left = 1 # room['max_splits']
- items_left = room['items']
+ items_left = room["items"]
for group in await get_assignable_groups(dbsession, assigned):
if items_left >= len(group.items) and splits_left > 0:
label = pluralize_label(group.label)
- dbsession.add(Room(number=f'{floor.level}.{nr}',
- label=label,
- group=group,
- floor=floor,
- items=group.items,
- position=room['position']))
+ dbsession.add(
+ Room(
+ number=f"{floor.level}.{nr}",
+ label=label,
+ group=group,
+ floor=floor,
+ items=group.items,
+ position=room["position"],
+ )
+ )
items_left = items_left - len(group.items)
splits_left = splits_left - 1
assigned.append(group.id)
@@ -100,19 +106,19 @@ async def generate_rooms(dbsession, floor, nr, room_ids, rooms, assigned):
async def generate_structure_impl(config):
"""Generate the floors and rooms structure."""
async with create_sessionmaker(config)() as dbsession:
- room_ids = [room['id'] for room in config['layout']['rooms']]
+ room_ids = [room["id"] for room in config["layout"]["rooms"]]
room_ids.reverse()
- rooms = dict([(room['id'], room) for room in config['layout']['rooms']])
+ rooms = {room["id"]: room for room in config["layout"]["rooms"]}
assigned = []
assignable = await get_assignable_groups(dbsession, assigned)
old_len = len(assignable)
floor_nr = -1
- progress = click.progressbar(length=len(assignable), label='Generating layout')
+ progress = click.progressbar(length=len(assignable), label="Generating layout")
progress.update(0)
while assignable:
floor_nr = floor_nr + 1
- floor = Floor(label = f'Floor {floor_nr}', level=floor_nr)
+ floor = Floor(label=f"Floor {floor_nr}", level=floor_nr)
dbsession.add(floor)
await generate_rooms(dbsession, floor, 1, deepcopy(room_ids), rooms, assigned)
assignable = await get_assignable_groups(dbsession, assigned)
@@ -125,12 +131,12 @@ async def generate_structure_impl(config):
@click.pass_context
def generate_structure(ctx):
"""Generate the floors and rooms structure."""
- asyncio.run(generate_structure_impl(ctx.obj['config']))
+ asyncio.run(generate_structure_impl(ctx.obj["config"]))
def get_basic_group(group):
"""Find the first basic group"""
- if group.split == 'basic':
+ if group.split == "basic":
return group
else:
return get_basic_group(group.parent)
@@ -147,7 +153,7 @@ def depth(group):
def get_ancestors(group):
"""Determine all the ancestors of a group."""
if group.parent:
- return [group.parent] + get_ancestors(group.parent)
+ return [group.parent, *get_ancestors(group.parent)]
else:
return []
@@ -156,27 +162,35 @@ async def summarise_rooms(dbsession):
"""Generate the room summaries."""
rooms = await dbsession.execute(select(Room).options(selectinload(Room.items)))
rooms_count = await dbsession.execute(select(func.count(Room.id)))
- with click.progressbar(rooms.scalars(), length=rooms_count.scalar_one(), label='Generating room summaries') as progress:
+ with click.progressbar(
+ rooms.scalars(), length=rooms_count.scalar_one(), label="Generating room summaries"
+ ) as progress:
for room in progress:
- room.sample = choice(room.items)
+ room.sample = choice(room.items) # noqa: S311
dbsession.add(room)
await dbsession.commit()
async def summarise_floors(dbsession):
"""Generate the floor summaries."""
- floors = await dbsession.execute(select(Floor).options(selectinload(Floor.topics), selectinload(Floor.rooms), selectinload(Floor.samples)))
+ floors = await dbsession.execute(
+ select(Floor).options(selectinload(Floor.topics), selectinload(Floor.rooms), selectinload(Floor.samples))
+ )
floors_count = await dbsession.execute(select(func.count(Floor.id)))
- with click.progressbar(floors.scalars(), length=floors_count.scalar_one(), label='Generating floor summaries') as progress:
+ with click.progressbar(
+ floors.scalars(), length=floors_count.scalar_one(), label="Generating floor summaries"
+ ) as progress:
for floor in progress:
floor_groups = {}
if len(floor.topics) == 0:
- groups = await dbsession.execute(select(Group).join(Group.room).filter(Room.floor_id == floor.id).options(selectinload(Group.items)))
+ groups = await dbsession.execute(
+ select(Group).join(Group.room).filter(Room.floor_id == floor.id).options(selectinload(Group.items))
+ )
for group in groups.scalars():
size = await count_items(dbsession, group)
- while group.split in ['time', 'similar', 'attribute', 'inner']:
+ while group.split in ["time", "similar", "attribute", "inner"]:
parent_result = await dbsession.execute(select(Group).filter(Group.id == group.parent_id))
- group = parent_result.scalar_one()
+ group = parent_result.scalar_one() # noqa: PLW2901
if group in floor_groups:
floor_groups[group] = floor_groups[group] + size
else:
@@ -188,9 +202,11 @@ async def summarise_floors(dbsession):
for group, size in group_sizes:
sub_total = sub_total + size
dbsession.add(FloorTopic(label=pluralize_label(group.label), group=group, floor=floor, size=size))
- if sub_total / total > 0.66666:
+ if sub_total / total > 0.66666: # noqa: PLR2004
break
- items_result = await dbsession.execute(select(Item).filter(Item.room_id.in_([room.id for room in floor.rooms])))
+ items_result = await dbsession.execute(
+ select(Item).filter(Item.room_id.in_([room.id for room in floor.rooms]))
+ )
items = list(items_result.scalars())
floor.samples = [items[idx] for idx in range(0, len(items), math.floor(len(items) / 15))]
await dbsession.commit()
@@ -209,7 +225,7 @@ async def generate_summaries_impl(config):
@click.pass_context
def generate_summaries(ctx):
"""Generate the floor and room summaries"""
- asyncio.run(generate_summaries_impl(ctx.obj['config']))
+ asyncio.run(generate_summaries_impl(ctx.obj["config"]))
async def order_items_impl(config):
@@ -218,7 +234,9 @@ async def order_items_impl(config):
result = await dbsession.execute(stmt)
stmt_count = select(func.count(Room.id))
result_count = await dbsession.execute(stmt_count)
- with click.progressbar(result.scalars(), length=result_count.scalar_one(), label='Ordering items in rooms') as progress:
+ with click.progressbar(
+ result.scalars(), length=result_count.scalar_one(), label="Ordering items in rooms"
+ ) as progress:
for room in progress:
vectors = {}
sorted_items = []
@@ -247,7 +265,7 @@ async def order_items_impl(config):
@click.pass_context
def order_items(ctx):
"""Order the items in each room"""
- asyncio.run(order_items_impl(ctx.obj['config']))
+ asyncio.run(order_items_impl(ctx.obj["config"]))
async def pipeline_impl(config):
@@ -261,7 +279,7 @@ async def pipeline_impl(config):
@click.pass_context
def pipeline(ctx):
"""Run the layout pipeline."""
- asyncio.run(pipeline_impl(ctx.obj['config']))
+ asyncio.run(pipeline_impl(ctx.obj["config"]))
@click.group()
diff --git a/museum_map/cli/search.py b/museum_map/cli/search.py
index 79460e9..d2626f7 100644
--- a/museum_map/cli/search.py
+++ b/museum_map/cli/search.py
@@ -1,6 +1,6 @@
import asyncio
-import click
+import click
from meilisearch_python_async import Client
from meilisearch_python_async.models.settings import Faceting
from meilisearch_python_async.task import wait_for_task
@@ -8,46 +8,48 @@
from sqlalchemy.future import select
from sqlalchemy.orm import selectinload
-from .util import ClickIndeterminate
-from ..models import Room, create_sessionmaker
+from museum_map.cli.util import ClickIndeterminate
+from museum_map.models import Room, create_sessionmaker
async def index_impl(config):
"""The actual indexing implementation."""
async with create_sessionmaker(config)() as dbsession:
- async with Client(config['search']['url'], config['search']['key']) as client:
+ async with Client(config["search"]["url"], config["search"]["key"]) as client:
try:
- index = await client.get_index('items')
+ index = await client.get_index("items")
task = await index.delete()
await wait_for_task(client, task.task_uid, timeout_in_ms=None)
- except Exception:
+ except Exception: # noqa: S110
pass
- items_idx = await client.create_index('items', primary_key='mmap_id')
+ items_idx = await client.create_index("items", primary_key="mmap_id")
stmt = select(Room).options(selectinload(Room.items))
result = await dbsession.execute(stmt)
stmt_count = select(func.count(Room.id))
result_count = await dbsession.execute(stmt_count)
docs = []
- with click.progressbar(result.scalars(), length=result_count.scalar_one(), label='Generating rooms documents') as progress:
+ with click.progressbar(
+ result.scalars(), length=result_count.scalar_one(), label="Generating rooms documents"
+ ) as progress:
for room in progress:
for item in room.items:
doc = {
- 'mmap_id': item.id,
- 'mmap_room': room.id,
- 'mmap_floor': room.floor_id,
+ "mmap_id": item.id,
+ "mmap_room": room.id,
+ "mmap_floor": room.floor_id,
}
doc.update(item.attributes)
docs.append(doc)
tasks = await items_idx.add_documents_in_batches(docs)
- with click.progressbar(tasks, label='Waiting for indexing to complete') as progress:
+ with click.progressbar(tasks, label="Waiting for indexing to complete") as progress:
for task in progress:
await wait_for_task(client, task.task_uid, timeout_in_ms=None, interval_in_ms=1000)
- progress = ClickIndeterminate('Updating filterable attributes')
+ progress = ClickIndeterminate("Updating filterable attributes")
progress.start()
- task = await items_idx.update_filterable_attributes(['mmap_room', 'mmap_floor'])
+ task = await items_idx.update_filterable_attributes(["mmap_room", "mmap_floor"])
await wait_for_task(client, task.task_uid, timeout_in_ms=None, interval_in_ms=1000)
progress.stop()
- progress = ClickIndeterminate('Updating faceting settings')
+ progress = ClickIndeterminate("Updating faceting settings")
progress.start()
task = await items_idx.update_faceting(Faceting(max_values_per_facet=1000))
await wait_for_task(client, task.task_uid, timeout_in_ms=None, interval_in_ms=1000)
@@ -58,7 +60,7 @@ async def index_impl(config):
@click.pass_context
def index(ctx):
"""Index the data"""
- asyncio.run(index_impl(ctx.obj['config']))
+ asyncio.run(index_impl(ctx.obj["config"]))
async def pipeline_impl(config):
diff --git a/museum_map/cli/server.py b/museum_map/cli/server.py
index 643e7aa..199198d 100644
--- a/museum_map/cli/server.py
+++ b/museum_map/cli/server.py
@@ -1,19 +1,17 @@
-import click
-
from importlib import resources
-from sqlalchemy.ext.asyncio import create_async_engine
+
+import click
from tornado.ioloop import IOLoop
from tornado.web import Application, StaticFileHandler
-from ..models import create_engine
-from ..server.handlers import (
+from museum_map.server.handlers import (
APICollectionHandler,
APIConfigHandler,
APIItemHandler,
APIPickHandler,
- FrontendHandler,
- APIStatusHandler,
APISearchHandler,
+ APIStatusHandler,
+ FrontendHandler,
create_inject_item_html,
)
@@ -21,29 +19,29 @@
@click.command()
@click.pass_context
def run(ctx):
- config = ctx.obj['config']
+ config = ctx.obj["config"]
app = Application(
[
- ('/api', APIStatusHandler),
- ('/api/picks/([a-z\-]+)', APIPickHandler),
- ('/api/config/all', APIConfigHandler, {'config': config}),
- ('/api/search', APISearchHandler),
- ('/api/([a-z\-]+)', APICollectionHandler),
- ('/api/([a-z\-]+)/([0-9]+)', APIItemHandler),
- ('/images/(.*)', StaticFileHandler, {'path': config['images']['basepath']}),
+ ("/api", APIStatusHandler),
+ (r"/api/picks/([a-z\-]+)", APIPickHandler),
+ ("/api/config/all", APIConfigHandler, {"config": config}),
+ ("/api/search", APISearchHandler),
+ (r"/api/([a-z\-]+)", APICollectionHandler),
+ (r"/api/([a-z\-]+)/([0-9]+)", APIItemHandler),
+ ("/images/(.*)", StaticFileHandler, {"path": config["images"]["basepath"]}),
(
- '/(.*)',
+ "/(.*)",
FrontendHandler,
{
- 'base': resources.files('museum_map') / 'server' / 'frontend' / 'public',
- 'html_injectors': {r'room/([0-9]+)/([0-9]+)': create_inject_item_html(config)},
+ "base": resources.files("museum_map") / "server" / "frontend" / "public",
+ "html_injectors": {r"room/([0-9]+)/([0-9]+)": create_inject_item_html(config)},
},
),
],
autoreload=True,
config=config,
)
- app.listen(config['server']['port'], address=config['server']['host'])
+ app.listen(config["server"]["port"], address=config["server"]["host"])
IOLoop.current().start()
diff --git a/museum_map/cli/util.py b/museum_map/cli/util.py
index f8e893c..1fc3cc8 100644
--- a/museum_map/cli/util.py
+++ b/museum_map/cli/util.py
@@ -1,9 +1,9 @@
"""Utility functionality for the cli."""
-import click
-
from threading import Thread
from time import sleep
+import click
+
class ClickIndeterminate(Thread):
"""A thread that shows a indeterminate busy animation using the cli."""
@@ -15,16 +15,16 @@ def __init__(self, label):
def run(self):
"""Run the animation sequence."""
- anim = ['\u28fe', '\u28f7', '\u28ef', '\u28df', '\u287f', '\u28bf', '\u28fb', '\u28fd']
+ anim = ["\u28fe", "\u28f7", "\u28ef", "\u28df", "\u287f", "\u28bf", "\u28fb", "\u28fd"]
anim.reverse()
self._active = True
- click.echo(f'{self._label} ', nl=False)
+ click.echo(f"{self._label} ", nl=False)
while self._active:
- click.echo(f'\b{anim[-1]}', nl=False)
+ click.echo(f"\b{anim[-1]}", nl=False)
anim.insert(0, anim.pop())
sleep(0.15)
def stop(self):
"""Stop the animation sequence."""
self._active = False
- click.echo(f'\b\u2713')
+ click.echo("\b\u2713")
diff --git a/museum_map/models/__init__.py b/museum_map/models/__init__.py
index 3aacbad..7ca6020 100644
--- a/museum_map/models/__init__.py
+++ b/museum_map/models/__init__.py
@@ -1,30 +1,32 @@
"""Database models."""
-from sqlalchemy.ext.asyncio import create_async_engine, AsyncEngine, AsyncSession
-from sqlalchemy.orm import sessionmaker
+from collections.abc import Callable
-from typing import Callable
+from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, create_async_engine
+from sqlalchemy.orm import sessionmaker
from .base import Base # noqa
-from .item import Item # noqa
+from .floor import Floor, FloorTopic # noqa
from .group import Group # noqa
+from .item import Item # noqa
from .room import Room # noqa
-from .floor import Floor, FloorTopic # noqa
-
engine = None
+
def create_engine(config) -> AsyncEngine:
"""Get a new singleton DB engine."""
- global engine
+ global engine # noqa: PLW0603
if engine is None:
- engine = create_async_engine(config['db']['dsn'])
+ engine = create_async_engine(config["db"]["dsn"])
return engine
async_sessionmaker = None
+
+
def create_sessionmaker(config) -> Callable[[], AsyncSession]:
"""Get a new singleton DB session maker."""
- global async_sessionmaker
+ global async_sessionmaker # noqa: PLW0603
if async_sessionmaker is None:
async_sessionmaker = sessionmaker(create_engine(config), expire_on_commit=False, class_=AsyncSession)
return async_sessionmaker
diff --git a/museum_map/models/base.py b/museum_map/models/base.py
index 6e8891e..aa6409a 100644
--- a/museum_map/models/base.py
+++ b/museum_map/models/base.py
@@ -2,11 +2,11 @@
from sqlalchemy.ext.declarative import declarative_base
NAMING_CONVENTION = {
- "ix": 'ix_%(column_0_label)s',
+ "ix": "ix_%(column_0_label)s",
"uq": "uq_%(table_name)s_%(column_0_name)s",
"ck": "ck_%(table_name)s_%(constraint_name)s",
"fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",
- "pk": "pk_%(table_name)s"
+ "pk": "pk_%(table_name)s",
}
metadata = MetaData(naming_convention=NAMING_CONVENTION)
diff --git a/museum_map/models/floor.py b/museum_map/models/floor.py
index a001e3b..2034885 100644
--- a/museum_map/models/floor.py
+++ b/museum_map/models/floor.py
@@ -1,102 +1,67 @@
-from sqlalchemy import (Table, Column, Integer, Unicode, UnicodeText, ForeignKey, Index)
+from sqlalchemy import Column, ForeignKey, Index, Integer, Table, Unicode
from sqlalchemy.orm import relationship
-from sqlalchemy_json import NestedMutableJson
-from .base import Base
+from museum_map.models.base import Base
-
-floors_items = Table('floors_items', Base.metadata,
- Column('floor_id', Integer, ForeignKey('floors.id')),
- Column('item_id', Integer, ForeignKey('items.id')))
+floors_items = Table(
+ "floors_items",
+ Base.metadata,
+ Column("floor_id", Integer, ForeignKey("floors.id")),
+ Column("item_id", Integer, ForeignKey("items.id")),
+)
class Floor(Base):
+ __tablename__ = "floors"
- __tablename__ = 'floors'
-
- id = Column(Integer, primary_key=True)
+ id = Column(Integer, primary_key=True) # noqa: A003
label = Column(Unicode(255))
level = Column(Integer)
- rooms = relationship('Room', back_populates='floor')
- samples = relationship('Item', secondary=floors_items)
- topics = relationship('FloorTopic', back_populates='floor')
+ rooms = relationship("Room", back_populates="floor")
+ samples = relationship("Item", secondary=floors_items)
+ topics = relationship("FloorTopic", back_populates="floor")
def as_jsonapi(self):
return {
- 'type': 'floors',
- 'id': str(self.id),
- 'attributes': {
- 'label': self.label,
- 'level': self.level,
+ "type": "floors",
+ "id": str(self.id),
+ "attributes": {
+ "label": self.label,
+ "level": self.level,
+ },
+ "relationships": {
+ "rooms": {"data": [{"type": "rooms", "id": str(room.id)} for room in self.rooms]},
+ "samples": {"data": [{"type": "items", "id": str(item.id)} for item in self.samples]},
+ "topics": {"data": [{"type": "floor-topics", "id": str(topic.id)} for topic in self.topics]},
},
- 'relationships': {
- 'rooms': {
- 'data': [
- {
- 'type': 'rooms',
- 'id': str(room.id)
- }
- for room in self.rooms
- ]
- },
- 'samples': {
- 'data': [
- {
- 'type': 'items',
- 'id': str(item.id)
- }
- for item in self.samples
- ]
- },
- 'topics': {
- 'data': [
- {
- 'type': 'floor-topics',
- 'id': str(topic.id)
- }
- for topic in self.topics
- ]
- }
- }
}
class FloorTopic(Base):
+ __tablename__ = "floor_topics"
- __tablename__ = 'floor_topics'
-
- id = Column(Integer, primary_key=True)
- group_id = Column(Integer, ForeignKey('groups.id'))
- floor_id = Column(Integer, ForeignKey('floors.id'))
+ id = Column(Integer, primary_key=True) # noqa: A003
+ group_id = Column(Integer, ForeignKey("groups.id"))
+ floor_id = Column(Integer, ForeignKey("floors.id"))
label = Column(Unicode(255))
size = Column(Integer)
- group = relationship('Group')
- floor = relationship('Floor', back_populates='topics')
+ group = relationship("Group")
+ floor = relationship("Floor", back_populates="topics")
def as_jsonapi(self):
return {
- 'type': 'floor-topics',
- 'id': str(self.id),
- 'attributes': {
- 'label': self.label,
- 'size': self.size,
+ "type": "floor-topics",
+ "id": str(self.id),
+ "attributes": {
+ "label": self.label,
+ "size": self.size,
+ },
+ "relationships": {
+ "group": {"data": {"type": "groups", "id": str(self.group_id)}},
+ "floor": {"data": {"type": "floors", "id": str(self.floor_id)}},
},
- 'relationships': {
- 'group': {
- 'data': {
- 'type': 'groups',
- 'id': str(self.group_id)
- }
- },
- 'floor': {
- 'data': {
- 'type': 'floors',
- 'id': str(self.floor_id)
- }
- }
- }
}
diff --git a/museum_map/models/group.py b/museum_map/models/group.py
index 957567f..1e1ac5e 100644
--- a/museum_map/models/group.py
+++ b/museum_map/models/group.py
@@ -1,34 +1,32 @@
-from sqlalchemy import (Column, Integer, Unicode, ForeignKey, Index)
+from sqlalchemy import Column, ForeignKey, Index, Integer, Unicode
from sqlalchemy.orm import relationship
-from sqlalchemy_json import NestedMutableJson
-from .base import Base
+from museum_map.models.base import Base
class Group(Base):
+ __tablename__ = "groups"
- __tablename__ = 'groups'
-
- id = Column(Integer, primary_key=True)
- parent_id = Column(Integer, ForeignKey('groups.id'))
+ id = Column(Integer, primary_key=True) # noqa: A003
+ parent_id = Column(Integer, ForeignKey("groups.id"))
value = Column(Unicode(255))
label = Column(Unicode(255))
split = Column(Unicode(64))
- parent = relationship('Group', remote_side=[id], back_populates='children', uselist=False)
- children = relationship('Group', remote_side=[parent_id])
- items = relationship('Item', back_populates='group')
- room = relationship('Room', back_populates='group', uselist=False)
+ parent = relationship("Group", remote_side=[id], back_populates="children", uselist=False)
+ children = relationship("Group", remote_side=[parent_id])
+ items = relationship("Item", back_populates="group")
+ room = relationship("Room", back_populates="group", uselist=False)
def as_jsonapi(self):
return {
- 'type': 'groups',
- 'id': str(self.id),
- 'attributes': {
- 'value': self.value,
- 'label': self.label,
- 'split': self.split,
- }
+ "type": "groups",
+ "id": str(self.id),
+ "attributes": {
+ "value": self.value,
+ "label": self.label,
+ "split": self.split,
+ },
}
diff --git a/museum_map/models/item.py b/museum_map/models/item.py
index 3dc2299..6708ada 100644
--- a/museum_map/models/item.py
+++ b/museum_map/models/item.py
@@ -1,37 +1,26 @@
-from sqlalchemy import (Column, Integer, ForeignKey, Index)
+from sqlalchemy import Column, ForeignKey, Index, Integer
from sqlalchemy.orm import relationship
from sqlalchemy_json import NestedMutableJson
-from .base import Base
+from museum_map.models.base import Base
class Item(Base):
+ __tablename__ = "items"
- __tablename__ = 'items'
-
- id = Column(Integer, primary_key=True)
- group_id = Column(Integer, ForeignKey('groups.id'))
- room_id = Column(Integer, ForeignKey('rooms.id'))
+ id = Column(Integer, primary_key=True) # noqa: A003
+ group_id = Column(Integer, ForeignKey("groups.id"))
+ room_id = Column(Integer, ForeignKey("rooms.id"))
attributes = Column(NestedMutableJson)
sequence = Column(Integer)
- group = relationship('Group', back_populates='items')
- room = relationship('Room', back_populates='items', primaryjoin='Item.room_id == Room.id')
+ group = relationship("Group", back_populates="items")
+ room = relationship("Room", back_populates="items", primaryjoin="Item.room_id == Room.id")
def as_jsonapi(self):
- data = {
- 'type': 'items',
- 'id': str(self.id),
- 'attributes': self.attributes,
- 'relationships': {}
- }
- if (self.room):
- data['relationships']['room'] = {
- 'data': {
- 'type': 'rooms',
- 'id': str(self.room_id)
- }
- }
+ data = {"type": "items", "id": str(self.id), "attributes": self.attributes, "relationships": {}}
+ if self.room:
+ data["relationships"]["room"] = {"data": {"type": "rooms", "id": str(self.room_id)}}
return data
diff --git a/museum_map/models/room.py b/museum_map/models/room.py
index 6b1b4e5..f185571 100644
--- a/museum_map/models/room.py
+++ b/museum_map/models/room.py
@@ -1,64 +1,38 @@
-from sqlalchemy import (Column, Integer, Unicode, ForeignKey, Index)
+from sqlalchemy import Column, ForeignKey, Index, Integer, Unicode
from sqlalchemy.orm import relationship
from sqlalchemy_json import NestedMutableJson
-from .base import Base
+from museum_map.models.base import Base
class Room(Base):
+ __tablename__ = "rooms"
- __tablename__ = 'rooms'
-
- id = Column(Integer, primary_key=True)
- floor_id = Column(Integer, ForeignKey('floors.id'))
- group_id = Column(Integer, ForeignKey('groups.id'))
- item_id = Column(Integer, ForeignKey('items.id'))
+ id = Column(Integer, primary_key=True) # noqa: A003
+ floor_id = Column(Integer, ForeignKey("floors.id"))
+ group_id = Column(Integer, ForeignKey("groups.id"))
+ item_id = Column(Integer, ForeignKey("items.id"))
number = Column(Unicode(16))
label = Column(Unicode(255))
position = Column(NestedMutableJson)
- group = relationship('Group', back_populates='room')
- floor = relationship('Floor', back_populates='rooms')
- sample = relationship('Item', primaryjoin='Room.item_id == Item.id')
- items = relationship('Item',
- back_populates='room',
- order_by='Item.sequence',
- primaryjoin='Room.id == Item.room_id')
+ group = relationship("Group", back_populates="room")
+ floor = relationship("Floor", back_populates="rooms")
+ sample = relationship("Item", primaryjoin="Room.item_id == Item.id")
+ items = relationship("Item", back_populates="room", order_by="Item.sequence", primaryjoin="Room.id == Item.room_id")
def as_jsonapi(self):
data = {
- 'type': 'rooms',
- 'id': str(self.id),
- 'attributes': {
- 'number': self.number,
- 'label': self.label,
- 'position': self.position
+ "type": "rooms",
+ "id": str(self.id),
+ "attributes": {"number": self.number, "label": self.label, "position": self.position},
+ "relationships": {
+ "floor": {"data": {"type": "floors", "id": str(self.floor_id)}},
+ "items": {"data": [{"type": "items", "id": str(item.id)} for item in self.items]},
},
- 'relationships': {
- 'floor': {
- 'data': {
- 'type': 'floors',
- 'id': str(self.floor_id)
- }
- },
- 'items': {
- 'data': [
- {
- 'type': 'items',
- 'id': str(item.id)
- }
- for item in self.items
- ]
- }
- }
}
if self.sample:
- data['relationships']['sample'] = {
- 'data': {
- 'type': 'items',
- 'id': str(self.sample.id)
- }
- }
+ data["relationships"]["sample"] = {"data": {"type": "items", "id": str(self.sample.id)}}
return data
diff --git a/museum_map/server/handlers.py b/museum_map/server/handlers.py
index eeb7aed..c70bf2e 100644
--- a/museum_map/server/handlers.py
+++ b/museum_map/server/handlers.py
@@ -1,22 +1,17 @@
import logging
import math
import re
-
-from configparser import ConfigParser
-from datetime import datetime
-from importlib import resources
+from datetime import datetime, timezone
from importlib.abc import Traversable
-from meilisearch_python_async import Client
from mimetypes import guess_type
-from random import randint
-from sqlalchemy import select, func
-from sqlalchemy.orm import selectinload, noload
-from sqlalchemy.ext.asyncio import AsyncSession
+
+from meilisearch_python_async import Client
+from sqlalchemy import func, select
+from sqlalchemy.orm import noload, selectinload
from tornado import web
from museum_map.__about__ import __version__
-from ..models import create_sessionmaker, Floor, FloorTopic, Room, Group, Item
-
+from museum_map.models import Floor, FloorTopic, Group, Item, Room, create_sessionmaker
logger = logging.getLogger(__name__)
@@ -28,21 +23,21 @@ def setup_query(types, multi_load):
multi_loader = selectinload
else:
multi_loader = noload
- if types == 'rooms':
+ if types == "rooms":
query = select(Room).options(selectinload(Room.floor), multi_loader(Room.items), selectinload(Room.sample))
class_ = Room
- elif types == 'floors':
+ elif types == "floors":
query = select(Floor).options(
multi_loader(Floor.rooms), multi_loader(Floor.samples), multi_loader(Floor.topics)
)
class_ = Floor
- elif types == 'items':
+ elif types == "items":
query = select(Item).options(selectinload(Item.room))
class_ = Item
- elif types == 'floor-topics':
+ elif types == "floor-topics":
query = select(FloorTopic).options(selectinload(FloorTopic.group), selectinload(FloorTopic.floor))
class_ = FloorTopic
- elif types == 'groups':
+ elif types == "groups":
query = select(Group)
class_ = Group
return (query, class_)
@@ -50,54 +45,54 @@ def setup_query(types, multi_load):
class APIStatusHandler(web.RequestHandler):
async def get(self):
- async with create_sessionmaker(self.application.settings['config'])() as session:
+ async with create_sessionmaker(self.application.settings["config"])():
ready = False
- self.write({'version': __version__, 'ready': ready})
+ self.write({"version": __version__, "ready": ready})
class RequestBase(web.RequestHandler):
def setup_query(self, types):
- return setup_query(types, not self.get_argument('relationships', 'true').lower() == 'false')
+ return setup_query(types, not self.get_argument("relationships", "true").lower() == "false")
class APICollectionHandler(RequestBase):
async def get(self, types):
- async with create_sessionmaker(self.application.settings['config'])() as session:
+ async with create_sessionmaker(self.application.settings["config"])() as session:
query, class_ = self.setup_query(types)
if query is not None and class_ is not None:
for key, values in self.request.arguments.items():
- if key.startswith('filter['):
- column = key[key.find('[') + 1 : key.find(']')]
- if values == '':
+ if key.startswith("filter["):
+ column = key[key.find("[") + 1 : key.find("]")]
+ if values == "":
query = query.filter(getattr(class_, column).in_([]))
else:
for value in values:
- value = value.decode()
- if value == '':
+ value = value.decode() # noqa: PLW2901
+ if value == "":
query = query.filter(getattr(class_, column).in_([]))
else:
- split_values = [int(v) for v in value.split(',')]
+ split_values = [int(v) for v in value.split(",")]
if len(split_values) == 1:
query = query.filter(getattr(class_, column) == split_values[0])
else:
query = query.filter(getattr(class_, column).in_(split_values))
result = await session.execute(query)
items = [item.as_jsonapi() for item in result.unique().scalars()]
- self.write({'data': items})
+ self.write({"data": items})
else:
self.send_error(status_code=404)
class APIItemHandler(RequestBase):
async def get(self, types, identifier):
- async with create_sessionmaker(self.application.settings['config'])() as session:
+ async with create_sessionmaker(self.application.settings["config"])() as session:
query, class_ = self.setup_query(types)
if query is not None and class_ is not None:
- query = query.filter(getattr(class_, 'id') == int(identifier))
+ query = query.filter(class_.id == int(identifier))
item = (await session.execute(query)).scalars().first()
if item is not None:
- self.write({'data': item.as_jsonapi()})
+ self.write({"data": item.as_jsonapi()})
else:
self.send_error(status_code=404)
else:
@@ -110,39 +105,39 @@ def initialize(self, config: dict) -> None:
async def get(self):
attributes = {
- 'intro': self._config['app']['intro'],
- 'item': self._config['app']['item'],
+ "intro": self._config["app"]["intro"],
+ "item": self._config["app"]["item"],
}
- if 'footer' in self._config['app']:
- for footer_location in ['center', 'right']:
- if footer_location in self._config['app']['footer']:
- if 'footer' not in attributes:
- attributes['footer'] = {}
- attributes['footer'][footer_location] = {
- 'label': self._config['app']['footer'][footer_location]['label']
+ if "footer" in self._config["app"]:
+ for footer_location in ["center", "right"]:
+ if footer_location in self._config["app"]["footer"]:
+ if "footer" not in attributes:
+ attributes["footer"] = {}
+ attributes["footer"][footer_location] = {
+ "label": self._config["app"]["footer"][footer_location]["label"]
}
- if 'url' in self._config['app']['footer'][footer_location]:
- attributes['footer'][footer_location]['url'] = self._config['app']['footer'][footer_location][
- 'url'
+ if "url" in self._config["app"]["footer"][footer_location]:
+ attributes["footer"][footer_location]["url"] = self._config["app"]["footer"][footer_location][
+ "url"
]
- self.write({'data': {'id': 'all', 'type': 'configs', 'attributes': attributes}})
+ self.write({"data": {"id": "all", "type": "configs", "attributes": attributes}})
class APIPickHandler(RequestBase):
- async def get(self, type):
- if type in ['random', 'todays']:
- async with create_sessionmaker(self.application.settings['config'])() as session:
- query, class_ = self.setup_query('items')
+ async def get(self, pick_type):
+ if pick_type in ["random", "todays"]:
+ async with create_sessionmaker(self.application.settings["config"])() as session:
+ query, class_ = self.setup_query("items")
if query is not None and class_ is not None:
- if type == 'random':
+ if pick_type == "random":
query = query.order_by(func.random()).limit(12)
- elif type == 'todays':
+ elif pick_type == "todays":
total = (await session.execute(select(func.count()).select_from(class_))).scalars().first()
- row_nr = (math.floor(datetime.utcnow().timestamp() / 86400) % total) + 1
- query = query.order_by(getattr(class_, 'id')).offset(row_nr).limit(1)
+ row_nr = (math.floor(datetime.now(tz=timezone.utc).timestamp() / 86400) % total) + 1
+ query = query.order_by(class_.id).offset(row_nr).limit(1)
result = await session.execute(query)
items = [item.as_jsonapi() for item in result.scalars()]
- self.write({'data': items})
+ self.write({"data": items})
else:
self.send_error(status_code=404)
else:
@@ -150,27 +145,27 @@ async def get(self, type):
class APISearchHandler(RequestBase):
- def initialize(self: 'APISearchHandler'):
+ def initialize(self: "APISearchHandler"):
self._client = Client(
- self.application.settings['config']['search']['url'], self.application.settings['config']['search']['key']
+ self.application.settings["config"]["search"]["url"], self.application.settings["config"]["search"]["key"]
)
self._index = None
- async def get(self: 'APISearchHandler'):
+ async def get(self: "APISearchHandler"):
if self._index is None:
- self._index = await self._client.get_index('items')
+ self._index = await self._client.get_index("items")
result = await self._index.search(
- self.get_argument('q'),
+ self.get_argument("q"),
limit=150,
- facets=['mmap_room', 'mmap_floor'],
+ facets=["mmap_room", "mmap_floor"],
filter=[f'mmap_room = {self.get_argument("room")}']
- if self.get_argument('room', default=None) != None
+ if self.get_argument("room", default=None) is not None
else [],
)
self.write(
{
- 'hits': result.hits,
- 'facetDistribution': result.facet_distribution,
+ "hits": result.hits,
+ "facetDistribution": result.facet_distribution,
}
)
@@ -178,7 +173,7 @@ async def get(self: 'APISearchHandler'):
class FrontendHandler(web.RedirectHandler):
"""Handler for the frontend application files."""
- def initialize(self: 'FrontendHandler', base: Traversable, html_injectors: dict = None) -> None:
+ def initialize(self: "FrontendHandler", base: Traversable, html_injectors: dict | None = None) -> None:
"""Initialise the frontend handler."""
self._base = base
if html_injectors:
@@ -186,25 +181,25 @@ def initialize(self: 'FrontendHandler', base: Traversable, html_injectors: dict
else:
self._html_injectors = {}
- async def get(self: 'FrontendHandler', path: str) -> None:
+ async def get(self: "FrontendHandler", path: str) -> None:
"""Get the file at the given path.
:param path: The path to get.
:type: path: str
"""
- self.xsrf_token
+ self.xsrf_token # noqa: B018
if not path.strip():
- path = '/'
+ path = "/"
try:
- logger.debug(f'Attempting to send {path}')
- await self._get_resource(self._base, path.split('/'))
+ logger.debug(f"Attempting to send {path}")
+ await self._get_resource(self._base, path.split("/"))
except FileNotFoundError:
- logger.debug('Sending index.html')
- await self._get_resource(self._base, ('index.html',), orig_path=path)
+ logger.debug("Sending index.html")
+ await self._get_resource(self._base, ("index.html",), orig_path=path)
async def _get_resource(
- self: 'FrontendHandler', resource: Traversable, path: list[str], orig_path: str = None
- ) -> None: # noqa: E501
+ self: "FrontendHandler", resource: Traversable, path: list[str], orig_path: str | None = None
+ ) -> None:
"""Send a file.
Performs mimetype guessing and sets the appropriate Content-Type header.
@@ -224,16 +219,16 @@ async def _get_resource(
for key, injector in self._html_injectors.items():
match = re.match(key, orig_path)
if match:
- html = data.decode('utf-8')
- split_idx = html.find('')
- html = f'{html[:split_idx]}{await injector(*match.groups())}{html[split_idx:]}'
- data = html.encode('utf-8')
+ html = data.decode("utf-8")
+ split_idx = html.find("")
+ html = f"{html[:split_idx]}{await injector(*match.groups())}{html[split_idx:]}"
+ data = html.encode("utf-8")
mimetype = guess_type(path[-1])
if mimetype and mimetype[0]:
- self.set_header('Content-Type', mimetype[0])
+ self.set_header("Content-Type", mimetype[0])
self.write(data)
- except IsADirectoryError:
- raise FileNotFoundError()
+ except IsADirectoryError as err:
+ raise FileNotFoundError() from err
def create_inject_item_html(config):
@@ -242,11 +237,11 @@ def create_inject_item_html(config):
async def inject_item_html(room_id: str, joke_id: str) -> str:
try:
async with create_sessionmaker(config)() as session:
- query, class_ = setup_query('items', False)
- query = query.filter(getattr(class_, 'id') == int(joke_id))
+ query, class_ = setup_query("items", False)
+ query = query.filter(class_.id == int(joke_id))
item = (await session.execute(query)).scalar()
if item:
- return f'''
+ return f"""
@@ -255,9 +250,9 @@ async def inject_item_html(room_id: str, joke_id: str) -> str:
-'''
- except Exception:
+"""
+ except Exception: # noqa: S110
pass
- return ''
+ return ""
return inject_item_html