From 5cfdd82a6b6e982b4e5ef3a443f00535ca0cba78 Mon Sep 17 00:00:00 2001 From: Mat Moore Date: Thu, 9 Jan 2025 14:00:02 +0000 Subject: [PATCH 1/2] refactor: rename domain -> subject area --- home/forms/search.py | 22 +++---- ...main_model.py => subject_area_taxonomy.py} | 12 ++-- home/service/search.py | 32 ++++++----- ...ain_fetcher.py => subject_area_fetcher.py} | 6 +- home/views.py | 14 ++--- .../client/datahub_client.py | 57 +++++++++---------- .../client/search/search_client.py | 10 ++-- .../data_platform_catalogue/search_types.py | 4 +- .../test_integration_with_datahub_server.py | 10 +--- tests/conftest.py | 20 +++---- 10 files changed, 94 insertions(+), 93 deletions(-) rename home/models/{domain_model.py => subject_area_taxonomy.py} (54%) rename home/service/{domain_fetcher.py => subject_area_fetcher.py} (86%) diff --git a/home/forms/search.py b/home/forms/search.py index 125e9167..cfa8e194 100644 --- a/home/forms/search.py +++ b/home/forms/search.py @@ -2,22 +2,24 @@ from urllib.parse import urlencode from data_platform_catalogue.entities import FindMoJdataEntityType -from data_platform_catalogue.search_types import DomainOption +from data_platform_catalogue.search_types import SubjectAreaOption from django import forms -from ..models.domain_model import Domain -from ..service.domain_fetcher import DomainFetcher +from ..models.subject_area_taxonomy import SubjectArea from ..service.search_tag_fetcher import SearchTagFetcher +from ..service.subject_area_fetcher import SubjectAreaFetcher -def get_domain_choices() -> list[Domain]: - """Make Domains API call to obtain domain choices""" +def get_subject_area_choices() -> list[SubjectArea]: + """Make Domains API call to obtain subject area choices""" choices = [ - Domain("", "All subject areas"), + SubjectArea("", "All subject areas"), ] - list_domain_options: list[DomainOption] = DomainFetcher().fetch() - domains: list[Domain] = [Domain(d.urn, d.name) for d in list_domain_options] - choices.extend(domains) + subject_area_options: list[SubjectAreaOption] = SubjectAreaFetcher().fetch() + subject_areas: list[SubjectArea] = [ + SubjectArea(d.urn, d.name) for d in subject_area_options + ] + choices.extend(subject_areas) return choices @@ -64,7 +66,7 @@ class SearchForm(forms.Form): ), ) domain = forms.ChoiceField( - choices=get_domain_choices, + choices=get_subject_area_choices, required=False, widget=forms.Select( attrs={ diff --git a/home/models/domain_model.py b/home/models/subject_area_taxonomy.py similarity index 54% rename from home/models/domain_model.py rename to home/models/subject_area_taxonomy.py index b06cdb3f..86666d3b 100644 --- a/home/models/domain_model.py +++ b/home/models/subject_area_taxonomy.py @@ -1,21 +1,23 @@ import logging from typing import NamedTuple -from data_platform_catalogue.search_types import DomainOption +from data_platform_catalogue.search_types import SubjectAreaOption logger = logging.getLogger(__name__) -class Domain(NamedTuple): +class SubjectArea(NamedTuple): urn: str label: str -class DomainModel: - def __init__(self, domains: list[DomainOption]): +class SubjectAreaTaxonomy: + def __init__(self, domains: list[SubjectAreaOption]): self.labels = {} - self.top_level_domains = [Domain(domain.urn, domain.name) for domain in domains] + self.top_level_domains = [ + SubjectArea(domain.urn, domain.name) for domain in domains + ] logger.info(f"{self.top_level_domains=}") for urn, label in self.top_level_domains: diff --git a/home/service/search.py b/home/service/search.py index 4defabe1..4f1d8af1 100644 --- a/home/service/search.py +++ b/home/service/search.py @@ -4,26 +4,26 @@ from data_platform_catalogue.entities import FindMoJdataEntityMapper, Mappers from data_platform_catalogue.search_types import ( - DomainOption, MultiSelectFilter, SearchResponse, SortOption, + SubjectAreaOption, ) from django.conf import settings from django.core.paginator import Paginator from nltk.stem import PorterStemmer from home.forms.search import SearchForm -from home.models.domain_model import DomainModel +from home.models.subject_area_taxonomy import SubjectAreaTaxonomy from .base import GenericService -from .domain_fetcher import DomainFetcher +from .subject_area_fetcher import SubjectAreaFetcher class SearchService(GenericService): def __init__(self, form: SearchForm, page: str, items_per_page: int = 20): - domains: list[DomainOption] = DomainFetcher().fetch() - self.domain_model = DomainModel(domains) + subject_areas: list[SubjectAreaOption] = SubjectAreaFetcher().fetch() + self.subject_area_taxonomy = SubjectAreaTaxonomy(subject_areas) self.stemmer = PorterStemmer() self.form = form if self.form.is_bound: @@ -77,7 +77,7 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse: else "ascending" ) - domain = form_data.get("domain", "") + subject_area = form_data.get("domain", "") tags = form_data.get("tags", "") where_to_access = self._build_custom_property_filter( "dc_where_to_access_dataset=", form_data.get("where_to_access", []) @@ -85,8 +85,8 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse: entity_types = self._build_entity_types(form_data.get("entity_types", [])) filter_value = [] - if domain: - filter_value.append(MultiSelectFilter("domains", [domain])) + if subject_area: + filter_value.append(MultiSelectFilter("domains", [subject_area])) if where_to_access: filter_value.append(MultiSelectFilter("customProperties", where_to_access)) if tags: @@ -120,13 +120,15 @@ def _get_paginator(self, items_per_page: int) -> Paginator: def _generate_remove_filter_hrefs(self) -> dict[str, dict[str, str]] | None: if self.form.is_bound: - domain = self.form.cleaned_data.get("domain", "") + subject_area = self.form.cleaned_data.get("domain", "") entity_types = self.form.cleaned_data.get("entity_types", []) where_to_access = self.form.cleaned_data.get("where_to_access", []) tags = self.form.cleaned_data.get("tags", []) remove_filter_hrefs = {} - if domain: - remove_filter_hrefs["Subject area"] = self._generate_domain_clear_href() + if subject_area: + remove_filter_hrefs["Subject area"] = ( + self._generate_subject_area_clear_href() + ) if entity_types: entity_types_clear_href = {} for entity_type in entity_types: @@ -159,17 +161,17 @@ def _generate_remove_filter_hrefs(self) -> dict[str, dict[str, str]] | None: return remove_filter_hrefs - def _generate_domain_clear_href( + def _generate_subject_area_clear_href( self, ) -> dict[str, str]: - domain = self.form.cleaned_data.get("domain", "") + subject_area = self.form.cleaned_data.get("domain", "") - label = self.domain_model.get_label(domain) + label = self.subject_area_taxonomy.get_label(subject_area) return { label: ( self.form.encode_without_filter( - filter_name="domain", filter_value=domain + filter_name="domain", filter_value=subject_area ) ) } diff --git a/home/service/domain_fetcher.py b/home/service/subject_area_fetcher.py similarity index 86% rename from home/service/domain_fetcher.py rename to home/service/subject_area_fetcher.py index f15e3e3c..ab2be6fc 100644 --- a/home/service/domain_fetcher.py +++ b/home/service/subject_area_fetcher.py @@ -1,10 +1,10 @@ -from data_platform_catalogue.search_types import DomainOption +from data_platform_catalogue.search_types import SubjectAreaOption from django.core.cache import cache from .base import GenericService -class DomainFetcher(GenericService): +class SubjectAreaFetcher(GenericService): """ DomainFetcher implementation to fetch domains with the total number of associated entities from the backend. @@ -16,7 +16,7 @@ def __init__(self, filter_zero_entities: bool = True): self.cache_timeout_seconds = 300 self.filter_zero_entities = filter_zero_entities - def fetch(self) -> list[DomainOption]: + def fetch(self) -> list[SubjectAreaOption]: """ Fetch a static list of options that is independent of the search query and any applied filters. Values are cached for 5 seconds to avoid diff --git a/home/views.py b/home/views.py index 1108b925..40aa4c5b 100644 --- a/home/views.py +++ b/home/views.py @@ -11,7 +11,7 @@ PublicationDatasetEntityMapping, TableEntityMapping, ) -from data_platform_catalogue.search_types import DomainOption +from data_platform_catalogue.search_types import SubjectAreaOption from django.conf import settings from django.http import Http404, HttpResponse, HttpResponseBadRequest from django.shortcuts import render @@ -31,10 +31,10 @@ DatabaseDetailsCsvFormatter, DatasetDetailsCsvFormatter, ) -from home.service.domain_fetcher import DomainFetcher from home.service.glossary import GlossaryService from home.service.metadata_specification import MetadataSpecificationService from home.service.search import SearchService +from home.service.subject_area_fetcher import SubjectAreaFetcher type_details_map = { TableEntityMapping.url_formatted: DatasetDetailsService, @@ -49,10 +49,10 @@ @cache_control(max_age=300, private=True) def home_view(request): """ - Displys only domains that have entities tagged for display in the catalog. + Displys only subject areas that have entities tagged for display in the catalog. """ - domains: list[DomainOption] = DomainFetcher().fetch() - context = {"domains": domains, "h1_value": "Home"} + subject_areas: list[SubjectAreaOption] = SubjectAreaFetcher().fetch() + context = {"domains": subject_areas, "h1_value": "Home"} return render(request, "home.html", context) @@ -130,7 +130,7 @@ def metadata_specification_view(request): def cookies_view(request): - valid_domains = [ + valid_subject_areas = [ urlparse(origin).netloc for origin in settings.CSRF_TRUSTED_ORIGINS ] referer = request.META.get("HTTP_REFERER") @@ -139,7 +139,7 @@ def cookies_view(request): referer_domain = urlparse(referer).netloc # Validate this referer domain against declared valid domains - if referer_domain not in valid_domains: + if referer_domain not in valid_subject_areas: referer = "/" # Set to home page if invalid context = { diff --git a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py index d0f01bd6..90dd5031 100644 --- a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py @@ -3,33 +3,6 @@ from importlib.resources import files from typing import Sequence -from datahub.configuration.common import ConfigurationError -from datahub.emitter import mce_builder -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph -from datahub.ingestion.source.common.subtypes import ( - DatasetContainerSubTypes, - DatasetSubTypes, -) -from datahub.metadata import schema_classes -from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance -from datahub.metadata.schema_classes import ( - ChangeTypeClass, - ContainerClass, - ContainerPropertiesClass, - DatasetPropertiesClass, - DomainPropertiesClass, - DomainsClass, - OtherSchemaClass, - OwnerClass, - OwnershipClass, - OwnershipTypeClass, - SchemaFieldClass, - SchemaFieldDataTypeClass, - SchemaMetadataClass, - SubTypesClass, -) - from data_platform_catalogue.client.exceptions import ( AspectDoesNotExist, ConnectivityError, @@ -75,10 +48,36 @@ TableEntityMapping, ) from data_platform_catalogue.search_types import ( - DomainOption, MultiSelectFilter, SearchResponse, SortOption, + SubjectAreaOption, +) +from datahub.configuration.common import ConfigurationError +from datahub.emitter import mce_builder +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph +from datahub.ingestion.source.common.subtypes import ( + DatasetContainerSubTypes, + DatasetSubTypes, +) +from datahub.metadata import schema_classes +from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + ContainerClass, + ContainerPropertiesClass, + DatasetPropertiesClass, + DomainPropertiesClass, + DomainsClass, + OtherSchemaClass, + OwnerClass, + OwnershipClass, + OwnershipTypeClass, + SchemaFieldClass, + SchemaFieldDataTypeClass, + SchemaMetadataClass, + SubTypesClass, ) logger = logging.getLogger(__name__) @@ -234,7 +233,7 @@ def list_domains( query: str = "*", filters: Sequence[MultiSelectFilter] | None = None, count: int = 1000, - ) -> list[DomainOption]: + ) -> list[SubjectAreaOption]: """ Returns a list of DomainOption objects """ diff --git a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py index d338a4a4..ba473c5e 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py @@ -28,13 +28,13 @@ TableEntityMapping, ) from data_platform_catalogue.search_types import ( - DomainOption, FacetOption, MultiSelectFilter, SearchFacets, SearchResponse, SearchResult, SortOption, + SubjectAreaOption, ) from datahub.configuration.common import GraphError # pylint: disable=E0611 from datahub.ingestion.graph.client import DataHubGraph # pylint: disable=E0611 @@ -220,7 +220,7 @@ def list_domains( query: str = "*", filters: Sequence[MultiSelectFilter] | None = None, count: int = 1000, - ) -> list[DomainOption]: + ) -> list[SubjectAreaOption]: """ Returns domains that can be used to filter the search results. """ @@ -272,8 +272,8 @@ def _map_result_types( def _parse_list_domains( self, list_domains_result: list[dict[str, Any]] - ) -> list[DomainOption]: - list_domain_options: list[DomainOption] = [] + ) -> list[SubjectAreaOption]: + list_domain_options: list[SubjectAreaOption] = [] for domain in list_domains_result: urn = domain.get("urn", "") @@ -282,7 +282,7 @@ def _parse_list_domains( entities = domain.get("entities", {}) total = entities.get("total", 0) - list_domain_options.append(DomainOption(urn, name, total)) + list_domain_options.append(SubjectAreaOption(urn, name, total)) return list_domain_options def _parse_dataset( diff --git a/lib/datahub-client/data_platform_catalogue/search_types.py b/lib/datahub-client/data_platform_catalogue/search_types.py index fd459d43..af7ce49b 100644 --- a/lib/datahub-client/data_platform_catalogue/search_types.py +++ b/lib/datahub-client/data_platform_catalogue/search_types.py @@ -6,8 +6,8 @@ from data_platform_catalogue.entities import ( EntityRef, - GlossaryTermRef, FindMoJdataEntityMapper, + GlossaryTermRef, TagRef, ) @@ -50,7 +50,7 @@ class FacetOption: @dataclass -class DomainOption: +class SubjectAreaOption: """ A representation of a domain and the number of associated entities represented by total. diff --git a/lib/datahub-client/tests/test_integration_with_datahub_server.py b/lib/datahub-client/tests/test_integration_with_datahub_server.py index b2f7648a..fee981fd 100644 --- a/lib/datahub-client/tests/test_integration_with_datahub_server.py +++ b/lib/datahub-client/tests/test_integration_with_datahub_server.py @@ -12,7 +12,6 @@ from datetime import datetime import pytest - from data_platform_catalogue.client.datahub_client import DataHubCatalogueClient from data_platform_catalogue.entities import ( AccessInformation, @@ -20,16 +19,13 @@ Database, DomainRef, EntityRef, - TableEntityMapping, Governance, OwnerRef, + TableEntityMapping, TagRef, UsageRestrictions, ) -from data_platform_catalogue.search_types import ( - DomainOption, - MultiSelectFilter, -) +from data_platform_catalogue.search_types import MultiSelectFilter, SubjectAreaOption jwt_token = os.environ.get("CATALOGUE_TOKEN") api_url = os.environ.get("CATALOGUE_URL", "") @@ -42,7 +38,7 @@ def test_list_domains(): response = client.list_domains() assert len(response) > 0 domain = response[0] - assert isinstance(domain, DomainOption) + assert isinstance(domain, SubjectAreaOption) assert "urn:li:domain" in domain.urn diff --git a/tests/conftest.py b/tests/conftest.py index 554b8d2a..357f7fbe 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,9 +30,9 @@ TagRef, ) from data_platform_catalogue.search_types import ( - DomainOption, SearchResponse, SearchResult, + SubjectAreaOption, ) from django.conf import settings from django.test import Client @@ -46,11 +46,11 @@ from selenium.webdriver.support.select import Select from home.forms.search import SearchForm -from home.models.domain_model import DomainModel +from home.models.subject_area_taxonomy import SubjectAreaTaxonomy from home.service.details import DatabaseDetailsService -from home.service.domain_fetcher import DomainFetcher from home.service.search import SearchService from home.service.search_tag_fetcher import SearchTagFetcher +from home.service.subject_area_fetcher import SubjectAreaFetcher fake = Faker() @@ -812,22 +812,22 @@ def mock_catalogue( mock_list_domains_response( mock_catalogue, domains=[ - DomainOption( + SubjectAreaOption( urn="urn:li:domain:prisons", name="Prisons", total=fake.random_int(min=1, max=100), ), - DomainOption( + SubjectAreaOption( urn="urn:li:domain:courts", name="Courts", total=fake.random_int(min=1, max=100), ), - DomainOption( + SubjectAreaOption( urn="urn:li:domain:finance", name="Finance", total=fake.random_int(min=1, max=100), ), - DomainOption( + SubjectAreaOption( urn="urn:li:domain:hq", name="HQ", total=0, @@ -959,7 +959,7 @@ def mock_get_publication_dataset_details_response( @pytest.fixture def list_domains(filter_zero_entities): - return DomainFetcher(filter_zero_entities).fetch() + return SubjectAreaFetcher(filter_zero_entities).fetch() @pytest.fixture @@ -969,8 +969,8 @@ def search_tags(): @pytest.fixture def valid_domain(): - domains = DomainFetcher().fetch() - return DomainModel( + domains = SubjectAreaFetcher().fetch() + return SubjectAreaTaxonomy( domains, ).top_level_domains[0] From 43efd680300fb28aedb64b59f07abe175065b539 Mon Sep 17 00:00:00 2001 From: Mat Moore Date: Thu, 9 Jan 2025 14:56:04 +0000 Subject: [PATCH 2/2] refactor: further renaming --- home/models/subject_area_taxonomy.py | 10 +++++----- home/service/subject_area_fetcher.py | 2 +- .../data_platform_catalogue/search_types.py | 2 +- tests/conftest.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/home/models/subject_area_taxonomy.py b/home/models/subject_area_taxonomy.py index 86666d3b..3ec34199 100644 --- a/home/models/subject_area_taxonomy.py +++ b/home/models/subject_area_taxonomy.py @@ -12,15 +12,15 @@ class SubjectArea(NamedTuple): class SubjectAreaTaxonomy: - def __init__(self, domains: list[SubjectAreaOption]): + def __init__(self, subject_areas: list[SubjectAreaOption]): self.labels = {} - self.top_level_domains = [ - SubjectArea(domain.urn, domain.name) for domain in domains + self.top_level_subject_areas = [ + SubjectArea(domain.urn, domain.name) for domain in subject_areas ] - logger.info(f"{self.top_level_domains=}") + logger.info(f"{self.top_level_subject_areas=}") - for urn, label in self.top_level_domains: + for urn, label in self.top_level_subject_areas: self.labels[urn] = label def get_label(self, urn): diff --git a/home/service/subject_area_fetcher.py b/home/service/subject_area_fetcher.py index ab2be6fc..a279a2d0 100644 --- a/home/service/subject_area_fetcher.py +++ b/home/service/subject_area_fetcher.py @@ -29,5 +29,5 @@ def fetch(self) -> list[SubjectAreaOption]: cache.set(self.cache_key, result, timeout=self.cache_timeout_seconds) if self.filter_zero_entities: - result = [domain for domain in result if domain.total > 0] + result = [subject_area for subject_area in result if subject_area.total > 0] return result diff --git a/lib/datahub-client/data_platform_catalogue/search_types.py b/lib/datahub-client/data_platform_catalogue/search_types.py index af7ce49b..42d6bd78 100644 --- a/lib/datahub-client/data_platform_catalogue/search_types.py +++ b/lib/datahub-client/data_platform_catalogue/search_types.py @@ -52,7 +52,7 @@ class FacetOption: @dataclass class SubjectAreaOption: """ - A representation of a domain and the number of associated entities + A representation of a subject area and the number of associated entities represented by total. """ diff --git a/tests/conftest.py b/tests/conftest.py index 357f7fbe..856e75a9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -972,7 +972,7 @@ def valid_domain(): domains = SubjectAreaFetcher().fetch() return SubjectAreaTaxonomy( domains, - ).top_level_domains[0] + ).top_level_subject_areas[0] @pytest.fixture