Skip to content

Commit

Permalink
Merge pull request #1852 from alphagov/add-reindex_organisations
Browse files Browse the repository at this point in the history
Add reindex_organisations to create an index of publishers in Solr
  • Loading branch information
kentsanggds authored Oct 15, 2024
2 parents d58eb4c + aef1837 commit dca6434
Showing 1 changed file with 67 additions and 0 deletions.
67 changes: 67 additions & 0 deletions ckanext/datagovuk/lib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import sqlalchemy
from datetime import datetime, timedelta
import json
import pysolr

from functools import wraps

Expand Down Expand Up @@ -366,6 +367,72 @@ def reindex_recent(context):

package_index.commit()

###
# Reindex organisation will only add organisations
# To update an organisation you will need to remove it first before running it:
#
# curl -g "http://$CKAN_SOLR_URL/solr/ckan/update?commit=true" \
# -H 'Content-Type: application/json' \
# -d '{"delete":{"query":"site_id:dgu_organisations%20AND%20name:org-name"}}'
###
@datagovuk.command()
@pass_context
def reindex_organisations(context):
'''
Reindex organisations
'''
print('=== Reindexing organisations')

engine = sqlalchemy.create_engine(tk.config.get('sqlalchemy.url'))
model.init_model(engine)

orgs = model.Session.query(model.Group) \
.filter(model.Group.type == 'organization') \
.filter(model.Group.state == u'active') \
.order_by(model.Group.name) \
.all()

solr = pysolr.Solr(os.getenv('CKAN_SOLR_URL'), always_commit=True, timeout=10)
solr.ping()

existing_organisations = [r.get('name') for r in solr.search("*", fq="(site_id:dgu_organisations)")]
organisations = []
counter = 0

for org in orgs:
if org.name in existing_organisations:
continue

counter += 1
print(f'{counter} - adding organisation {org.name}')
data = {
"site_id": "dgu_organisations",
"id": org.id,
"title": org.title,
"name": org.name,
}

org_extras = model.Session.query(model.GroupExtra) \
.filter(model.GroupExtra.group_id == org.id) \
.filter(model.GroupExtra.state == u'active') \
.all()

for extra in org_extras:
if extra.key in ['contact-email', 'contact-name', 'foi-name', 'foi-email', 'foi-web']:
data[f"extras_{extra.key}"] = extra.value

organisations.append(data)

if organisations:
solr.add(organisations)

results = solr.search("*", fq="(site_id:dgu_organisations)", rows=2000)

print(f"Retrieved {len(results)} results")

for result in results:
print(f"{result.get('title')} - {result.get('name')}")


def run_command(command):
try:
Expand Down

0 comments on commit dca6434

Please sign in to comment.