Skip to content

Commit

Permalink
Refactor Catalog Indexing (#2632)
Browse files Browse the repository at this point in the history
* Added test base

* Moved patches to separate packages

* Handle AT catalog multiplexing

* Skip portal catalog indexer

* reindex UID after sample creation

* Patch UID indexing

* Test fixture

* Added test for Analysis Indexing

* Workaround for partition IDs

* Added comment

* Added more tests

* Implemented Workflow Variables Patch

* Reduce noise in diff

* Added indexing test for batches

* README added

* Readme added

* Readme added

* Better comment

* Better comment

* Changelog updated

* Rely on API method to reindex new Sample objects

---------

Co-authored-by: Jordi Puiggené <jp@naralabs.com>
  • Loading branch information
ramonski and xispa authored Oct 31, 2024
1 parent 6121d66 commit a5fbfbd
Show file tree
Hide file tree
Showing 24 changed files with 965 additions and 231 deletions.
3 changes: 2 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ Changelog
2.6.0 (unreleased)
------------------

- #2632 Refactor Catalog Indexing
- #2630 Fix references from sample templates are not kept when partitioning
- #2634 Pin et-xmlfile to a Python 2 compatible version
- #2631 Fix for sending email attachment if filename contain spaces
- #2631 Fix for sending email attachment if filename contain spaces
- #2633 Fix DateTimeError when using API's to_DT and to_dt functions
- #2629 Fix default sticker template based on sample type is not rendered
- #2627 Skip workflow transition for temporary analyses
Expand Down
16 changes: 13 additions & 3 deletions src/bika/lims/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,10 +384,11 @@ def move_object(obj, destination, check_constraints=True):
return obj


def uncatalog_object(obj):
def uncatalog_object(obj, recursive=False):
"""Un-catalog the object from all catalogs
:param obj: object to un-catalog
:param recursive: recursively uncatalog all child objects
:type obj: ATContentType/DexterityContentType
"""
# un-catalog from registered catalogs
Expand All @@ -399,11 +400,16 @@ def uncatalog_object(obj):
url = "/".join(obj.getPhysicalPath()[2:])
uid_catalog.uncatalog_object(url)

if recursive:
for child in obj.objectValues():
uncatalog_object(child, recursive=recursive)

def catalog_object(obj):

def catalog_object(obj, recursive=False):
"""Re-catalog the object
:param obj: object to un-catalog
:param obj: object to catalog
:param recursive: recursively catalog all child objects
:type obj: ATContentType/DexterityContentType
"""
if is_at_content(obj):
Expand All @@ -416,6 +422,10 @@ def catalog_object(obj):
uc.catalog_object(obj, url)
obj.reindexObject()

if recursive:
for child in obj.objectValues():
catalog_object(child, recursive=recursive)


def delete(obj, check_permissions=True, suppress_events=False):
"""Deletes the given object
Expand Down
14 changes: 1 addition & 13 deletions src/bika/lims/utils/analysisrequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def create_analysisrequest(client, request, values, analyses=None,
# unmark the sample as temporary
api.unmark_temporary(ar)
# explicit reindexing after sample finalization
reindex(ar)
api.catalog_object(ar)
# notify object initialization (also creates a snapshot)
event.notify(ObjectInitializedEvent(ar))

Expand All @@ -169,18 +169,6 @@ def create_analysisrequest(client, request, values, analyses=None,
return ar


def reindex(obj, recursive=False):
"""Reindex the object
:param obj: The object to reindex
:param recursive: If true, all child objects are reindexed recursively
"""
obj.reindexObject()
if recursive:
for child in obj.objectValues():
reindex(child)


def receive_sample(sample, check_permission=False, date_received=None):
"""Receive the sample without transition
"""
Expand Down
11 changes: 8 additions & 3 deletions src/senaite/core/catalog/catalog_multiplex_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ def is_global_auditlog_enabled(self):
return setup.getEnableGlobalAuditlog()

def get_catalogs_for(self, obj):
catalogs = getattr(obj, "_catalogs", [])
"""Get a list of catalog IDs for the given object
"""
# get a list of catalog IDs that are mapped to the object
catalogs = list(map(lambda x: x.id, api.get_catalogs_for(obj)))

for rc in REQUIRED_CATALOGS:
if rc in catalogs:
continue
Expand All @@ -61,10 +65,11 @@ def get_catalogs_for(self, obj):
def supports_multi_catalogs(self, obj):
"""Check if the Multi Catalog Behavior is enabled
"""
if IMultiCatalogBehavior(obj, None) is None:
return False
if api.is_temporary(obj):
return False
if api.is_dexterity_content(obj) and \
IMultiCatalogBehavior(obj, None) is None:
return False
return True

def index(self, obj, attributes=None):
Expand Down
6 changes: 5 additions & 1 deletion src/senaite/core/idserver/idserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,11 @@ def get_partition_count(context, default=0):
if not parent:
return default

return len(parent.getDescendants())
# XXX: we need to count one up because the new partition only shows up in
# parent.getDescendants() *after* it has been renamed, because
# temporary objects don't get indexed!
# https://github.com/senaite/senaite.core/pull/2632
return len(parent.getDescendants()) + 1


def get_secondary_count(context, default=0):
Expand Down
13 changes: 0 additions & 13 deletions src/senaite/core/patches/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,3 @@
#
# Copyright 2018-2024 by it's authors.
# Some rights reserved, see README and LICENSE.

from bika.lims import api
from Products.Archetypes import utils


def isFactoryContained(obj):
"""Are we inside the portal_factory?
"""
return api.is_temporary(obj)


# https://pypi.org/project/collective.monkeypatcher/#patching-module-level-functions
utils.isFactoryContained = isFactoryContained
92 changes: 92 additions & 0 deletions src/senaite/core/patches/archetypes/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Archetypes Patches

This package contains patches for Archetype based content types.

## Catalog Multiplex

The module `catalog_multiplex` contains patches for the class
`Products.Archetpyes.CatalogMultiplex.CatalogMultiplex`, which is a mixin for
`BaseContent` and controls how to index, unindex, reindex these content and is
used when e.g. `obj.reindexObject` is called.

### Patches

The following methods are patched:

- `indexObject`
- `unindexObject`
- `reindexObject`

### Reason

The patches ensure that temporary objects are not indexed and delegate the
operation to the respective method of the catalog itself.

Due to the fact that SENAITE catalogs inherit from `Products.CMFPlone.CatalogTool.CatalogTool`,
which is a subclass of `Products.CMFCore.CatalogTool.CatalogTool`, this operation uses the
`IndexQueue` defined in `Products.CMFCore.indexing.IndexQueue` to optimize indexing.

### Notes

The index queue always looks up all registered `IIndexQueueProcessor` utilities
to further delegate the operation.

Since the `PortalCatalogProcessor` utility is registered there as well, a
patching is required to avoid indexing of, e.g. Samples or Analyses there as
they should be only indexed in their primary catalog, e.g.
`seanite_catalog_sample` or `senaite_catalog_analysis`.

Please see `senaite.core.patches.cmfcore.portal_catalog_processor` for details.

Furthermore, changes in `senaite.core.catalog.catalog_multiplex_processor.CatalogMultiplexProcessor`
were required to handle AT based contens as well.

Please see https://github.com/senaite/senaite.core/pull/2632 for details.

💡 It might make sense to define for each catalog its own `IIndexQueueProcessor`.
A simple check by content type would could decide if a content should be indexed or not.


## UID Catalog indexing

The module `referencable` contains patches for the class `Products.Archetypes.Referencable.Referencable`,
which is a mixin for `BaseObject` and controls AT native referencable behavior
(not used) and the indexing in the UID Catalog (used and needed for UID
references and more).

### Patches

The following methods are patched:

- `_catalogUID_`
- `uncatalogUID`

### Reason

The patches ensure that temporary objects are not indexed.

### Notes

As soon as we have migrated all contents to Dexterity, we should provide a
custom `senaite_catalog_uid` to keep track of the UIDs and maybe references.


## Base Object

The module `base_objects` contains patches for the class `Products.Archetypes.BaseObject.BaseObject`,
which is the base class for our AT based contents.

### Patches

The following methods are patched:

- `getLabels`
- `isTemporary`

### Reason

Provide a similar methods for AT contents as for DX contents.

**getLabels**: Get SENAITE labels (dynamically extended fields)

**isTemporary**: Checks if an object contains a temporary ID to avoid further indexing/processing
33 changes: 33 additions & 0 deletions src/senaite/core/patches/archetypes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
#
# This file is part of SENAITE.CORE.
#
# SENAITE.CORE is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Copyright 2018-2024 by it's authors.
# Some rights reserved, see README and LICENSE.


from bika.lims import api
from Products.Archetypes import utils


def isFactoryContained(obj):
"""Are we inside the portal_factory?
"""
return api.is_temporary(obj)


# https://pypi.org/project/collective.monkeypatcher/#patching-module-level-functions
utils.isFactoryContained = isFactoryContained
File renamed without changes.
116 changes: 116 additions & 0 deletions src/senaite/core/patches/archetypes/catalog_multiplex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# -*- coding: utf-8 -*-
#
# This file is part of SENAITE.CORE.
#
# SENAITE.CORE is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Copyright 2018-2024 by it's authors.
# Some rights reserved, see README and LICENSE.

from bika.lims import api
from Products.Archetypes.Referenceable import Referenceable
from Products.Archetypes.utils import shasattr
from senaite.core.catalog import AUDITLOG_CATALOG


def is_auditlog_enabled():
setup = api.get_senaite_setup()
if not setup:
return False
return setup.getEnableGlobalAuditlog()


def indexObject(self):
"""Handle indexing for AT based objects
"""
# Never index temporary AT objects
if api.is_temporary(self):
return

# get all registered catalogs
catalogs = api.get_catalogs_for(self)

for catalog in catalogs:
# skip auditlog_catalog if global auditlogging is deactivated
if catalog.id == AUDITLOG_CATALOG and not is_auditlog_enabled():
continue
# always use catalog tool queuing system
catalog.indexObject(self)


def unindexObject(self):
"""Handle unindexing for AT based objects
"""
# Never unindex temporary AT objects
if api.is_temporary(self):
return

# get all registered catalogs
catalogs = api.get_catalogs_for(self)

for catalog in catalogs:
# skip auditlog_catalog if global auditlogging is deactivated
if catalog.id == AUDITLOG_CATALOG and not is_auditlog_enabled():
continue
# always use catalog tool queuing system
catalog.unindexObject(self)


def reindexObject(self, idxs=None):
"""Reindex all AT based contents with the catalog queuing system
"""
# Never reindex temporary AT objects
if api.is_temporary(self):
return

if idxs is None:
idxs = []

# Copy (w/o knowig if this is required of not x_X)
if idxs == [] and shasattr(self, "notifyModified"):
# Archetypes default setup has this defined in ExtensibleMetadata
# mixin. note: this refreshes the 'etag ' too.
self.notifyModified()
self.http__refreshEtag()
# /Paste

catalogs = api.get_catalogs_for(self)

for catalog in catalogs:
# skip auditlog_catalog if global auditlogging is deactivated
if catalog.id == AUDITLOG_CATALOG and not is_auditlog_enabled():
continue
# We want the intersection of the catalogs idxs
# and the incoming list.
lst = idxs
indexes = catalog.indexes()
if idxs:
lst = [i for i in idxs if i in indexes]
# use catalog tool queuing system
catalog.reindexObject(self, idxs=lst)

# Copy (w/o knowig if this is required of not x_X)
#
# We only make this call if idxs is not passed.
#
# manage_afterAdd/manage_beforeDelete from Referenceable take
# care of most of the issues, but some places still expect to
# call reindexObject and have the uid_catalog updated.
# TODO: fix this so we can remove the following lines.
if not idxs:
if isinstance(self, Referenceable):
isCopy = getattr(self, '_v_is_cp', None)
if isCopy is None:
self._catalogUID(self)
# /Paste
Loading

0 comments on commit a5fbfbd

Please sign in to comment.