Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: rename domain -> subject area #1215

Merged
merged 2 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions home/forms/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,24 @@
from urllib.parse import urlencode

from data_platform_catalogue.entities import FindMoJdataEntityType
from data_platform_catalogue.search_types import DomainOption
from data_platform_catalogue.search_types import SubjectAreaOption
from django import forms

from ..models.domain_model import Domain
from ..service.domain_fetcher import DomainFetcher
from ..models.subject_area_taxonomy import SubjectArea
from ..service.search_tag_fetcher import SearchTagFetcher
from ..service.subject_area_fetcher import SubjectAreaFetcher


def get_domain_choices() -> list[Domain]:
"""Make Domains API call to obtain domain choices"""
def get_subject_area_choices() -> list[SubjectArea]:
"""Make Domains API call to obtain subject area choices"""
choices = [
Domain("", "All subject areas"),
SubjectArea("", "All subject areas"),
]
list_domain_options: list[DomainOption] = DomainFetcher().fetch()
domains: list[Domain] = [Domain(d.urn, d.name) for d in list_domain_options]
choices.extend(domains)
subject_area_options: list[SubjectAreaOption] = SubjectAreaFetcher().fetch()
subject_areas: list[SubjectArea] = [
SubjectArea(d.urn, d.name) for d in subject_area_options
]
choices.extend(subject_areas)
return choices


Expand Down Expand Up @@ -64,7 +66,7 @@ class SearchForm(forms.Form):
),
)
domain = forms.ChoiceField(
choices=get_domain_choices,
choices=get_subject_area_choices,
required=False,
widget=forms.Select(
attrs={
Expand Down
25 changes: 0 additions & 25 deletions home/models/domain_model.py

This file was deleted.

27 changes: 27 additions & 0 deletions home/models/subject_area_taxonomy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging
from typing import NamedTuple

from data_platform_catalogue.search_types import SubjectAreaOption

logger = logging.getLogger(__name__)


class SubjectArea(NamedTuple):
urn: str
label: str


class SubjectAreaTaxonomy:
def __init__(self, subject_areas: list[SubjectAreaOption]):
self.labels = {}

self.top_level_subject_areas = [
SubjectArea(domain.urn, domain.name) for domain in subject_areas
]
logger.info(f"{self.top_level_subject_areas=}")

for urn, label in self.top_level_subject_areas:
self.labels[urn] = label

def get_label(self, urn):
return self.labels.get(urn, urn)
32 changes: 17 additions & 15 deletions home/service/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,26 @@

from data_platform_catalogue.entities import FindMoJdataEntityMapper, Mappers
from data_platform_catalogue.search_types import (
DomainOption,
MultiSelectFilter,
SearchResponse,
SortOption,
SubjectAreaOption,
)
from django.conf import settings
from django.core.paginator import Paginator
from nltk.stem import PorterStemmer

from home.forms.search import SearchForm
from home.models.domain_model import DomainModel
from home.models.subject_area_taxonomy import SubjectAreaTaxonomy

from .base import GenericService
from .domain_fetcher import DomainFetcher
from .subject_area_fetcher import SubjectAreaFetcher


class SearchService(GenericService):
def __init__(self, form: SearchForm, page: str, items_per_page: int = 20):
domains: list[DomainOption] = DomainFetcher().fetch()
self.domain_model = DomainModel(domains)
subject_areas: list[SubjectAreaOption] = SubjectAreaFetcher().fetch()
self.subject_area_taxonomy = SubjectAreaTaxonomy(subject_areas)
self.stemmer = PorterStemmer()
self.form = form
if self.form.is_bound:
Expand Down Expand Up @@ -77,16 +77,16 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse:
else "ascending"
)

domain = form_data.get("domain", "")
subject_area = form_data.get("domain", "")
tags = form_data.get("tags", "")
where_to_access = self._build_custom_property_filter(
"dc_where_to_access_dataset=", form_data.get("where_to_access", [])
)
entity_types = self._build_entity_types(form_data.get("entity_types", []))

filter_value = []
if domain:
filter_value.append(MultiSelectFilter("domains", [domain]))
if subject_area:
filter_value.append(MultiSelectFilter("domains", [subject_area]))
if where_to_access:
filter_value.append(MultiSelectFilter("customProperties", where_to_access))
if tags:
Expand Down Expand Up @@ -120,13 +120,15 @@ def _get_paginator(self, items_per_page: int) -> Paginator:

def _generate_remove_filter_hrefs(self) -> dict[str, dict[str, str]] | None:
if self.form.is_bound:
domain = self.form.cleaned_data.get("domain", "")
subject_area = self.form.cleaned_data.get("domain", "")
entity_types = self.form.cleaned_data.get("entity_types", [])
where_to_access = self.form.cleaned_data.get("where_to_access", [])
tags = self.form.cleaned_data.get("tags", [])
remove_filter_hrefs = {}
if domain:
remove_filter_hrefs["Subject area"] = self._generate_domain_clear_href()
if subject_area:
remove_filter_hrefs["Subject area"] = (
self._generate_subject_area_clear_href()
)
if entity_types:
entity_types_clear_href = {}
for entity_type in entity_types:
Expand Down Expand Up @@ -159,17 +161,17 @@ def _generate_remove_filter_hrefs(self) -> dict[str, dict[str, str]] | None:

return remove_filter_hrefs

def _generate_domain_clear_href(
def _generate_subject_area_clear_href(
self,
) -> dict[str, str]:
domain = self.form.cleaned_data.get("domain", "")
subject_area = self.form.cleaned_data.get("domain", "")

label = self.domain_model.get_label(domain)
label = self.subject_area_taxonomy.get_label(subject_area)

return {
label: (
self.form.encode_without_filter(
filter_name="domain", filter_value=domain
filter_name="domain", filter_value=subject_area
)
)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from data_platform_catalogue.search_types import DomainOption
from data_platform_catalogue.search_types import SubjectAreaOption
from django.core.cache import cache

from .base import GenericService


class DomainFetcher(GenericService):
class SubjectAreaFetcher(GenericService):
"""
DomainFetcher implementation to fetch domains with the total number of
associated entities from the backend.
Expand All @@ -16,7 +16,7 @@ def __init__(self, filter_zero_entities: bool = True):
self.cache_timeout_seconds = 300
self.filter_zero_entities = filter_zero_entities

def fetch(self) -> list[DomainOption]:
def fetch(self) -> list[SubjectAreaOption]:
"""
Fetch a static list of options that is independent of the search query
and any applied filters. Values are cached for 5 seconds to avoid
Expand All @@ -29,5 +29,5 @@ def fetch(self) -> list[DomainOption]:
cache.set(self.cache_key, result, timeout=self.cache_timeout_seconds)

if self.filter_zero_entities:
result = [domain for domain in result if domain.total > 0]
result = [subject_area for subject_area in result if subject_area.total > 0]
return result
14 changes: 7 additions & 7 deletions home/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
PublicationDatasetEntityMapping,
TableEntityMapping,
)
from data_platform_catalogue.search_types import DomainOption
from data_platform_catalogue.search_types import SubjectAreaOption
from django.conf import settings
from django.http import Http404, HttpResponse, HttpResponseBadRequest
from django.shortcuts import render
Expand All @@ -31,10 +31,10 @@
DatabaseDetailsCsvFormatter,
DatasetDetailsCsvFormatter,
)
from home.service.domain_fetcher import DomainFetcher
from home.service.glossary import GlossaryService
from home.service.metadata_specification import MetadataSpecificationService
from home.service.search import SearchService
from home.service.subject_area_fetcher import SubjectAreaFetcher

type_details_map = {
TableEntityMapping.url_formatted: DatasetDetailsService,
Expand All @@ -49,10 +49,10 @@
@cache_control(max_age=300, private=True)
def home_view(request):
"""
Displys only domains that have entities tagged for display in the catalog.
Displys only subject areas that have entities tagged for display in the catalog.
"""
domains: list[DomainOption] = DomainFetcher().fetch()
context = {"domains": domains, "h1_value": "Home"}
subject_areas: list[SubjectAreaOption] = SubjectAreaFetcher().fetch()
context = {"domains": subject_areas, "h1_value": "Home"}
return render(request, "home.html", context)


Expand Down Expand Up @@ -130,7 +130,7 @@ def metadata_specification_view(request):


def cookies_view(request):
valid_domains = [
valid_subject_areas = [
urlparse(origin).netloc for origin in settings.CSRF_TRUSTED_ORIGINS
]
referer = request.META.get("HTTP_REFERER")
Expand All @@ -139,7 +139,7 @@ def cookies_view(request):
referer_domain = urlparse(referer).netloc

# Validate this referer domain against declared valid domains
if referer_domain not in valid_domains:
if referer_domain not in valid_subject_areas:
referer = "/" # Set to home page if invalid

context = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,6 @@
from importlib.resources import files
from typing import Sequence

from datahub.configuration.common import ConfigurationError
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
DatasetSubTypes,
)
from datahub.metadata import schema_classes
from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance
from datahub.metadata.schema_classes import (
ChangeTypeClass,
ContainerClass,
ContainerPropertiesClass,
DatasetPropertiesClass,
DomainPropertiesClass,
DomainsClass,
OtherSchemaClass,
OwnerClass,
OwnershipClass,
OwnershipTypeClass,
SchemaFieldClass,
SchemaFieldDataTypeClass,
SchemaMetadataClass,
SubTypesClass,
)

from data_platform_catalogue.client.exceptions import (
AspectDoesNotExist,
ConnectivityError,
Expand Down Expand Up @@ -75,10 +48,36 @@
TableEntityMapping,
)
from data_platform_catalogue.search_types import (
DomainOption,
MultiSelectFilter,
SearchResponse,
SortOption,
SubjectAreaOption,
)
from datahub.configuration.common import ConfigurationError
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
DatasetSubTypes,
)
from datahub.metadata import schema_classes
from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance
from datahub.metadata.schema_classes import (
ChangeTypeClass,
ContainerClass,
ContainerPropertiesClass,
DatasetPropertiesClass,
DomainPropertiesClass,
DomainsClass,
OtherSchemaClass,
OwnerClass,
OwnershipClass,
OwnershipTypeClass,
SchemaFieldClass,
SchemaFieldDataTypeClass,
SchemaMetadataClass,
SubTypesClass,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -234,7 +233,7 @@ def list_domains(
query: str = "*",
filters: Sequence[MultiSelectFilter] | None = None,
count: int = 1000,
) -> list[DomainOption]:
) -> list[SubjectAreaOption]:
"""
Returns a list of DomainOption objects
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@
TableEntityMapping,
)
from data_platform_catalogue.search_types import (
DomainOption,
FacetOption,
MultiSelectFilter,
SearchFacets,
SearchResponse,
SearchResult,
SortOption,
SubjectAreaOption,
)
from datahub.configuration.common import GraphError # pylint: disable=E0611
from datahub.ingestion.graph.client import DataHubGraph # pylint: disable=E0611
Expand Down Expand Up @@ -220,7 +220,7 @@ def list_domains(
query: str = "*",
filters: Sequence[MultiSelectFilter] | None = None,
count: int = 1000,
) -> list[DomainOption]:
) -> list[SubjectAreaOption]:
"""
Returns domains that can be used to filter the search results.
"""
Expand Down Expand Up @@ -272,8 +272,8 @@ def _map_result_types(

def _parse_list_domains(
self, list_domains_result: list[dict[str, Any]]
) -> list[DomainOption]:
list_domain_options: list[DomainOption] = []
) -> list[SubjectAreaOption]:
list_domain_options: list[SubjectAreaOption] = []

for domain in list_domains_result:
urn = domain.get("urn", "")
Expand All @@ -282,7 +282,7 @@ def _parse_list_domains(
entities = domain.get("entities", {})
total = entities.get("total", 0)

list_domain_options.append(DomainOption(urn, name, total))
list_domain_options.append(SubjectAreaOption(urn, name, total))
return list_domain_options

def _parse_dataset(
Expand Down
Loading
Loading