From 584bcd828ccf6569371563c51f9c0d4660c2a917 Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Wed, 22 Dec 2021 15:25:35 -0500 Subject: [PATCH 01/19] new: update ckan to dev-2.9 branch to pull into solr8 updates --- ckan/requirements.in | 2 +- ckan/requirements.txt | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/ckan/requirements.in b/ckan/requirements.in index dc857fd9..fd453af3 100644 --- a/ckan/requirements.in +++ b/ckan/requirements.in @@ -1,6 +1,6 @@ # CKAN requirements and extensions -ckan @ git+https://github.com/ckan/ckan.git@ckan-2.9.4 +ckan @ git+https://github.com/ckan/ckan.git@dev-v2.9 -e git+https://github.com/GSA/ckanext-datagovcatalog.git@main#egg=ckanext-datagovcatalog -e git+https://github.com/GSA/ckanext-datagovtheme.git@main#egg=ckanext-datagovtheme -e git+https://github.com/GSA/ckanext-datajson.git@main#egg=ckanext-datajson diff --git a/ckan/requirements.txt b/ckan/requirements.txt index b188663a..8ec16fb9 100644 --- a/ckan/requirements.txt +++ b/ckan/requirements.txt @@ -6,20 +6,20 @@ boto==2.49.0 certifi==2021.10.8 cffi==1.15.0 chardet==3.0.4 -ckan @ git+https://github.com/ckan/ckan.git@6731c5a821a6a5f4bdaa20f4e793e0b6ba44f823 +ckan @ git+https://github.com/ckan/ckan.git@ef1432fa4177edbd0a2f457bad363a33b6b61344 -e git+https://github.com/GSA/ckanext-datagovcatalog.git@64e65702ae1eb5e46d9f37139dc4044c0f253526#egg=ckanext_datagovcatalog --e git+https://github.com/GSA/ckanext-datagovtheme.git@fa53b7c836d4cc3f34b0c5f156b04f9bacb8a67e#egg=ckanext_datagovtheme +-e git+https://github.com/GSA/ckanext-datagovtheme.git@d20b0f696cc16251dc94ee811f3f36cbde6673dc#egg=ckanext_datagovtheme -e git+https://github.com/GSA/ckanext-datajson.git@a5d8c9458a7efe955c31b90eeac1e7797881d014#egg=ckanext_datajson ckanext-dcat @ git+https://github.com/ckan/ckanext-dcat@2d2c8a894bea8c97b0c8544465094f9979ac516b ckanext-envvars @ git+https://github.com/GSA/ckanext-envvars.git@33f7e190ab332244cb961a425e09af592d9b647b -e git+https://github.com/GSA/ckanext-geodatagov.git@af6378074fcbc2705e7e33960d5ddd2c8e46ed4c#egg=ckanext_geodatagov ckanext-googleanalyticsbasic @ git+https://github.com/GSA/ckanext-googleanalyticsbasic.git@c6a425d5e14d658c0fa3661fdc4423162161c3f4 --e git+https://github.com/ckan/ckanext-harvest.git@9d5679f0461f5aac05b7f800e11b6a62afb7feeb#egg=ckanext_harvest +-e git+https://github.com/ckan/ckanext-harvest.git@d84d847b09f28ab97bf1ca0baa651fdc05693d03#egg=ckanext_harvest ckanext-saml2auth @ git+https://github.com/keitaroinc/ckanext-saml2auth.git@7412ff7aba3d215f95a08f99216410e72e60c5bc -e git+https://github.com/gsa/ckanext-spatial.git@3828c6e7efe7c4b5cef02f4e7163339c7b5c5710#egg=ckanext_spatial ckantoolkit==0.0.3 click==7.1.2 -cryptography==35.0.0 +cryptography==36.0.1 defusedxml==0.7.1 Deprecated==1.2.13 distro==1.6.0 @@ -33,28 +33,28 @@ flask-multistatic==1.0 future==0.18.2 GeoAlchemy2==0.5.0 geomet==0.3.0 -gevent==21.8.0 +gevent==21.12.0 google-compute-engine==2.8.13 greenlet==1.1.2 gunicorn==20.1.0 html5lib==1.1 idna==2.10 importlib-resources==5.4.0 -isodate==0.6.0 +isodate==0.6.1 itsdangerous==2.0.1 Jinja2==2.11.3 json-table-schema==0.2.1 jsonschema==2.4.0 LEPL==5.1.3 -lxml==4.6.4 -Mako==1.1.5 +lxml==4.7.1 +Mako==1.1.6 Markdown==3.1.1 MarkupSafe==2.0.1 messytables==0.15.2 newrelic==7.2.4.171 nose==1.3.7 OWSLib==0.18.0 -packaging==21.2 +packaging==21.3 passlib==1.7.3 PasteDeploy==2.0.1 pathtools==0.1.2 @@ -65,7 +65,7 @@ psycopg2==2.8.6 pycparser==2.21 PyJWT==1.7.1 pyOpenSSL==20.0.1 -pyparsing==2.4.7 +pyparsing==3.0.6 pyproj==2.6.1 pysaml2==7.0.1 pysolr==3.6.0 @@ -78,7 +78,7 @@ PyUtilib==5.7.1 PyYAML==5.4 PyZ3950 @ git+https://github.com/danizen/PyZ3950@6d44a4ab85c8bda3a7542c2c9efdfad46c830219 rdflib==4.2.2 -redis==4.0.0 +redis==4.0.2 repoze.lru==0.7 repoze.who==2.3 requests==2.25.0 @@ -100,7 +100,7 @@ WebOb==1.8.7 Werkzeug==1.0.0 wrapt==1.13.3 xlrd==2.0.1 -xmlschema==1.8.2 +xmlschema==1.9.1 zipp==3.6.0 zope.event==4.5.0 zope.interface==5.4.0 From 0b4823248e1eb3cc6daae5e8303eeb64a233d362 Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Wed, 22 Dec 2021 16:02:55 -0500 Subject: [PATCH 02/19] tests: update test to look for 2.9.5b --- e2e/cypress/integration/ckan_extensions.spec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/e2e/cypress/integration/ckan_extensions.spec.js b/e2e/cypress/integration/ckan_extensions.spec.js index 41baca29..27cf31ea 100644 --- a/e2e/cypress/integration/ckan_extensions.spec.js +++ b/e2e/cypress/integration/ckan_extensions.spec.js @@ -3,7 +3,7 @@ describe('CKAN Extensions', () => { it('Uses CKAN 2.9', () => { cy.request('/api/action/status_show').should((response) => { expect(response.body).to.have.property('success', true); - expect(response.body.result).to.have.property('ckan_version', '2.9.4'); + expect(response.body.result).to.have.property('ckan_version', '2.9.5b'); }); }) @@ -43,4 +43,4 @@ describe('CKAN Extensions', () => { // expect(installed_extensions).to.include('qa'); }); }) -}) \ No newline at end of file +}) From 19a69557788a5cd1f69f735e577bcb9f9359a30e Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Wed, 22 Dec 2021 16:03:49 -0500 Subject: [PATCH 03/19] new: update to solr 8 --- solr/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/Dockerfile b/solr/Dockerfile index 584263d8..9865911c 100644 --- a/solr/Dockerfile +++ b/solr/Dockerfile @@ -1,4 +1,4 @@ -FROM solr:6.6.6-slim +FROM solr:8.11-slim MAINTAINER Open Knowledge # Enviroment From cc0d403df1c6bb07aaf3d65104d1ef3d023c0fc2 Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Wed, 29 Dec 2021 09:41:29 -0500 Subject: [PATCH 04/19] new: install curl in Solr image to interact with API, related to https://github.com/GSA/datagov-brokerpak-solr/pull/29 --- docker-compose.yml | 2 +- solr/Dockerfile | 7 ++++++- solr/service-config.json | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 7c3fbd79..836fcc9c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,7 +24,7 @@ services: - pg_data:/var/lib/postgresql/data solr: - image: ghcr.io/gsa/catalog.data.gov.solr + image: ghcr.io/gsa/catalog.data.gov.solr:8-curl ports: - "8983:8983" volumes: diff --git a/solr/Dockerfile b/solr/Dockerfile index 9865911c..fd91e03c 100644 --- a/solr/Dockerfile +++ b/solr/Dockerfile @@ -8,9 +8,13 @@ ENV CKAN_VERSION dev-v2.9 # Set user to root for initial configuration USER root +# Add curl to access API routes +RUN apt-get update && apt-get install -y curl + # Create Directories RUN mkdir -p /opt/solr/server/solr/$SOLR_CORE/conf RUN mkdir -p /opt/solr/server/solr/$SOLR_CORE/data +RUN mkdir -p /var/solr/data/$SOLR_CORE/ # Adding Files ADD solrconfig.xml \ @@ -20,7 +24,8 @@ https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.6/ https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.6/solr/server/solr/configsets/basic_configs/conf/stopwords.txt \ https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.6/solr/server/solr/configsets/basic_configs/conf/protwords.txt \ https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.6/solr/server/solr/configsets/data_driven_schema_configs/conf/elevate.xml \ -/opt/solr/server/solr/$SOLR_CORE/conf/ +/var/solr/data/$SOLR_CORE/ +#/opt/solr/server/solr/$SOLR_CORE/conf/ # Fix Issue https://github.com/GSA/datagov-deploy/issues/3285 diff --git a/solr/service-config.json b/solr/service-config.json index 1c595ada..9e0c61fd 100644 --- a/solr/service-config.json +++ b/solr/service-config.json @@ -1,4 +1,4 @@ { "solrImageRepo": "ghcr.io/gsa/catalog.data.gov.solr", - "solrImageTag": "latest" -} \ No newline at end of file + "solrImageTag": "8-curl" +} From 97695cab135846ff8d07190ad47cd327246d93a2 Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Thu, 30 Dec 2021 15:42:44 -0500 Subject: [PATCH 05/19] new: Got Solr 8 Image functional --- solr/Dockerfile | 53 +- solr/currency.xml | 67 -- solr/elevate.xml | 42 -- solr/protwords.txt | 21 - solr/schema.xml | 208 ------ solr/solrconfig.xml | 1638 ++++++++++++++++++++++++++++++++++--------- solr/stopwords.txt | 14 - solr/synonyms.txt | 29 - 8 files changed, 1326 insertions(+), 746 deletions(-) delete mode 100644 solr/currency.xml delete mode 100644 solr/elevate.xml delete mode 100644 solr/protwords.txt delete mode 100644 solr/schema.xml delete mode 100644 solr/stopwords.txt delete mode 100644 solr/synonyms.txt diff --git a/solr/Dockerfile b/solr/Dockerfile index fd91e03c..d5f66180 100644 --- a/solr/Dockerfile +++ b/solr/Dockerfile @@ -1,32 +1,37 @@ FROM solr:8.11-slim MAINTAINER Open Knowledge -# Enviroment + +######################## +## CKAN Specific Changes +## Pulled from https://github.com/ckan/ckan-solr/blob/master/solr-8/Dockerfile +######################## + +EXPOSE 8983 + ENV SOLR_CORE ckan -ENV CKAN_VERSION dev-v2.9 +ARG CKAN_BRANCH="dev-v2.9" + +ENV SOLR_CONFIG_DIR="/opt/solr/server/solr/configsets" +ENV SOLR_SCHEMA_FILE="$SOLR_CONFIG_DIR/ckan/conf/managed-schema" # Set user to root for initial configuration USER root -# Add curl to access API routes -RUN apt-get update && apt-get install -y curl +# Create a CKAN configset by copying the default one +RUN cp -R $SOLR_CONFIG_DIR/_default $SOLR_CONFIG_DIR/ckan -# Create Directories -RUN mkdir -p /opt/solr/server/solr/$SOLR_CORE/conf -RUN mkdir -p /opt/solr/server/solr/$SOLR_CORE/data -RUN mkdir -p /var/solr/data/$SOLR_CORE/ +# Update the schema +ADD https://raw.githubusercontent.com/ckan/ckan/$CKAN_BRANCH/ckan/config/solr/schema.xml $SOLR_SCHEMA_FILE +RUN chmod 644 $SOLR_SCHEMA_FILE -# Adding Files -ADD solrconfig.xml \ -schema.xml \ -https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.6/solr/server/solr/configsets/basic_configs/conf/currency.xml \ -https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.6/solr/server/solr/configsets/basic_configs/conf/synonyms.txt \ -https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.6/solr/server/solr/configsets/basic_configs/conf/stopwords.txt \ -https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.6/solr/server/solr/configsets/basic_configs/conf/protwords.txt \ -https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.6/solr/server/solr/configsets/data_driven_schema_configs/conf/elevate.xml \ -/var/solr/data/$SOLR_CORE/ -#/opt/solr/server/solr/$SOLR_CORE/conf/ +####################### +## GSA Specific Changes (also need root user) +####################### + +# Add curl to access API routes +RUN apt-get update && apt-get install -y curl # Fix Issue https://github.com/GSA/datagov-deploy/issues/3285 # Update the angularjs library files @@ -79,12 +84,16 @@ RUN perl -0777 -i -pe 's/ \ /opt/solr/server/etc/webdefault.xml -# Create Core.properties -RUN echo name=$SOLR_CORE > /opt/solr/server/solr/$SOLR_CORE/core.properties +# Fix from https://stackoverflow.com/a/31320697 +ADD solrconfig.xml $SOLR_CONFIG_DIR/ckan/conf/solrconfig.xml +RUN sed -i "s/text<\/defaultSearchField>/text<\/df>/" $SOLR_SCHEMA_FILE +RUN sed -i "s///" $SOLR_SCHEMA_FILE # Giving ownership to user 'solr' -RUN mkdir /opt/solr/server/solr/$SOLR_CORE/data/index +RUN mkdir -p /opt/solr/server/solr/$SOLR_CORE/data/index RUN chown -R $SOLR_USER:$SOLR_USER /opt/solr/server/solr/ - +# # Set user to 'solr' for to comply with solr security USER solr + +CMD ["sh", "-c", "solr-precreate ckan $SOLR_CONFIG_DIR/ckan"] diff --git a/solr/currency.xml b/solr/currency.xml deleted file mode 100644 index 3a9c58af..00000000 --- a/solr/currency.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/elevate.xml b/solr/elevate.xml deleted file mode 100644 index 2c09ebed..00000000 --- a/solr/elevate.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - diff --git a/solr/protwords.txt b/solr/protwords.txt deleted file mode 100644 index 1dfc0abe..00000000 --- a/solr/protwords.txt +++ /dev/null @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff --git a/solr/schema.xml b/solr/schema.xml deleted file mode 100644 index 1929d6dc..00000000 --- a/solr/schema.xml +++ /dev/null @@ -1,208 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -index_id -text - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/solr/solrconfig.xml b/solr/solrconfig.xml index 9ac620c2..f76fca4a 100644 --- a/solr/solrconfig.xml +++ b/solr/solrconfig.xml @@ -1,343 +1,1295 @@ - - - - - - 6.0.0 - - - - - - - - - - - ${solr.data.dir:} - - - - - - - ${solr.lock.type:native} - - - - - - - - ${solr.ulog.dir:} - ${solr.ulog.numVersionBuckets:65536} - - - - ${solr.autoCommit.maxTime:15000} - false - - - - ${solr.autoSoftCommit.maxTime:-1} - - - - - - - 1024 - - - - - true - 20 - 200 - - - - - - - - - - false - 2 - - - - - - - - - - - - - - explicit - 10 - - - - - - - - explicit - json - true - - - - - - - - explicit - - - - - - - - _text_ - - - - - - - - add-unknown-fields-to-the-schema - - - - - - - - true - ignored_ - _text_ - - - - - - - - - - - explicit - true - - - - - - - text_general - - - default - _text_ - solr.DirectSolrSpellChecker - internal - 0.5 - 2 - 1 - 5 - 4 - 0.01 - - - - - - - - default - on - true - 10 - 5 - 5 - true - true - 10 - 5 - - - - spellcheck - - - - - - - - - - true - - - tvComponent - - - - - - - - - - true - false - - - - terms - - - - - - - string - elevate.xml - - - - - - - explicit - - - elevator - - - - - - - - - - - 100 - - - - - - 70 - 0.5 - [-\w ,/\n\"']{20,200} - - - - - - ]]> - ]]> - - - - - - - - - - - - ,, - ,, - ,, - ,, - ,]]> - ]]> - - - - - - - 10 - .,!? - - - - - - WORD - en - US - - - - - - - - - - - - - - - - [^\w-\.] - _ - - - - - - - yyyy-MM-dd'T'HH:mm:ss.SSSZ - yyyy-MM-dd'T'HH:mm:ss,SSSZ - yyyy-MM-dd'T'HH:mm:ss.SSS - yyyy-MM-dd'T'HH:mm:ss,SSS - yyyy-MM-dd'T'HH:mm:ssZ - yyyy-MM-dd'T'HH:mm:ss - yyyy-MM-dd'T'HH:mmZ - yyyy-MM-dd'T'HH:mm - yyyy-MM-dd HH:mm:ss.SSSZ - yyyy-MM-dd HH:mm:ss,SSSZ - yyyy-MM-dd HH:mm:ss.SSS - yyyy-MM-dd HH:mm:ss,SSS - yyyy-MM-dd HH:mm:ssZ - yyyy-MM-dd HH:mm:ss - yyyy-MM-dd HH:mmZ - yyyy-MM-dd HH:mm - yyyy-MM-dd - - - - - - - text/plain; charset=UTF-8 - - - - ${velocity.template.base.dir:} - ${velocity.solr.resource.loader.enabled:true} - ${velocity.params.resource.loader.enabled:false} - - - - 5 - - - + + + + + + + + + 8.11.1 + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.lock.type:native} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + ${solr.ulog.numVersionBuckets:65536} + + + + + ${solr.autoCommit.maxTime:15000} + false + + + + + + ${solr.autoSoftCommit.maxTime:-1} + + + + + + + + + + + + + + ${solr.max.booleanClauses:1024} + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + + + + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + + + + + + + + + + + + + + + + explicit + json + true + + + + + + _text_ + + + + + + + + + text_general + + + + + + default + _text_ + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + + + + + + + default + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + true + false + + + terms + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + [^\w-\.] + _ + + + + + + + yyyy-MM-dd['T'[HH:mm[:ss[.SSS]][z + yyyy-MM-dd['T'[HH:mm[:ss[,SSS]][z + yyyy-MM-dd HH:mm[:ss[.SSS]][z + yyyy-MM-dd HH:mm[:ss[,SSS]][z + [EEE, ]dd MMM yyyy HH:mm[:ss] z + EEEE, dd-MMM-yy HH:mm:ss z + EEE MMM ppd HH:mm:ss [z ]yyyy + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + + + + + + diff --git a/solr/stopwords.txt b/solr/stopwords.txt deleted file mode 100644 index ae1e83ee..00000000 --- a/solr/stopwords.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/solr/synonyms.txt b/solr/synonyms.txt deleted file mode 100644 index eab4ee87..00000000 --- a/solr/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - From 49dcb564e0a4682329e692510e9a06ae42909ed4 Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Thu, 30 Dec 2021 15:46:09 -0500 Subject: [PATCH 06/19] docs: update reference to fixes --- solr/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solr/Dockerfile b/solr/Dockerfile index d5f66180..568dcdc5 100644 --- a/solr/Dockerfile +++ b/solr/Dockerfile @@ -84,8 +84,10 @@ RUN perl -0777 -i -pe 's/ \ /opt/solr/server/etc/webdefault.xml -# Fix from https://stackoverflow.com/a/31320697 +# Incorporate Fix from https://stackoverflow.com/a/31320697 ADD solrconfig.xml $SOLR_CONFIG_DIR/ckan/conf/solrconfig.xml + +# Fim from https://github.com/ckan/ckan/issues/5585#issuecomment-953586246 RUN sed -i "s/text<\/defaultSearchField>/text<\/df>/" $SOLR_SCHEMA_FILE RUN sed -i "s///" $SOLR_SCHEMA_FILE From fdc139bd8147528cbb6f6d05ab5c24f54521635f Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Tue, 4 Jan 2022 11:09:51 -0500 Subject: [PATCH 07/19] new: add custom prerun script to tackle solr8 connection check; inherit the rest from the original prerun script --- ckan/Dockerfile | 5 +++- ckan/setup/GSA_prerun.py | 54 ++++++++++++++++++++++++++++++++++++++++ ckan/setup/ckan_setup.sh | 5 ++-- 3 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 ckan/setup/GSA_prerun.py diff --git a/ckan/Dockerfile b/ckan/Dockerfile index 927bb974..e57bde52 100644 --- a/ckan/Dockerfile +++ b/ckan/Dockerfile @@ -26,6 +26,9 @@ COPY docker-entrypoint.d/* /docker-entrypoint.d/ COPY setup/gunicorn.conf.py ${APP_DIR}/ COPY setup/server_start.sh ${APP_DIR}/ +# Custom prerun script for Solr 8 +COPY setup/GSA_prerun.py ${APP_DIR}/ + COPY saml2 ${APP_DIR}/saml2 # COPY the ini test file to the container @@ -39,4 +42,4 @@ RUN ln -s /usr/bin/python3 /usr/bin/python # harvests, we need to setup a cron for the run command COPY setup/harvest-check-cron /etc/crontabs/root -# RUN sudo -u ckan -EH pip3 install git+https://github.com/nickumia-reisys/werkzeug@e1f6527604ab30e4b46b5430a5fb97e7a7055cd7#egg=werkzeug \ No newline at end of file +# RUN sudo -u ckan -EH pip3 install git+https://github.com/nickumia-reisys/werkzeug@e1f6527604ab30e4b46b5430a5fb97e7a7055cd7#egg=werkzeug diff --git a/ckan/setup/GSA_prerun.py b/ckan/setup/GSA_prerun.py new file mode 100644 index 00000000..0d0c985c --- /dev/null +++ b/ckan/setup/GSA_prerun.py @@ -0,0 +1,54 @@ +import os +import sys +import time +try: + from urllib.request import urlopen + from urllib.error import URLError +except ImportError: + from urllib2 import urlopen + from urllib2 import URLError + +import prerun as pr + +RETRY = 5 + + +def check_solr_connection(retry=None): + if retry is None: + retry = RETRY + elif retry == 0: + print("[prerun] Giving up after 5 tries...") + sys.exit(1) + + url = os.environ.get("CKAN_SOLR_URL", "") + search_url = "{url}/select/?q=*&wt=json".format(url=url) + + try: + connection = urlopen(search_url) + except URLError as e: + print(str(e)) + print("[prerun] Unable to connect to solr, waiting...") + time.sleep(10) + check_solr_connection(retry=retry - 1) + else: + try: + pythonified = str(connection.read()).replace('true', 'True') + eval(pythonified) + except TypeError: + pass + + +if __name__ == "__main__": + + maintenance = os.environ.get("MAINTENANCE_MODE", "").lower() == "true" + + if maintenance: + print("[prerun] Maintenance mode, skipping setup...") + else: + pr.check_main_db_connection() + pr.init_db() + pr.update_plugins() + pr.check_datastore_db_connection() + pr.init_datastore_db() + check_solr_connection() + pr.create_sysadmin() diff --git a/ckan/setup/ckan_setup.sh b/ckan/setup/ckan_setup.sh index 5f287c96..e139c88a 100755 --- a/ckan/setup/ckan_setup.sh +++ b/ckan/setup/ckan_setup.sh @@ -51,7 +51,8 @@ ckan config-tool $SRC_DIR/ckan/test-core.ini \ "ckan.redis.url = $TEST_CKAN_REDIS_URL" # Run the prerun script to init CKAN and create the default admin user -python3 prerun.py +# python3 prerun.py +python GSA_prerun.py # Run any startup scripts provided by images extending this one if [[ -d "/docker-entrypoint.d" ]] @@ -66,4 +67,4 @@ then done fi -exec /app/ckan/setup/server_start.sh \ No newline at end of file +exec /app/ckan/setup/server_start.sh From d4472f12ae5df984b8feb17f2e57df0bad58d583 Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Tue, 4 Jan 2022 16:59:52 -0500 Subject: [PATCH 08/19] new: add CKAN_SOLR_BASE_URL, CKAN_SOLR_USER, CKAN_SOLR_PASSWORD to local env --- .env | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.env b/.env index 4acb2891..a8ac2d49 100644 --- a/.env +++ b/.env @@ -27,6 +27,10 @@ TEST_CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@db/datastore_te CKAN_SOLR_URL=http://solr:8983/solr/ckan CKAN_REDIS_URL=redis://redis:6379/1 +CKAN_SOLR_BASE_URL=http://solr:8983 +CKAN_SOLR_USER=admin +CKAN_SOLR_PASSWORD=pass + TEST_CKAN_SOLR_URL=http://solr:8983/solr/ckan TEST_CKAN_REDIS_URL=redis://redis:6379/1 @@ -142,4 +146,4 @@ CKANEXT__SAML2AUTH__REQUESTED_AUTHN_CONTEXT=http://idmanagement.gov/ns/assurance CKANEXT__SAML2AUTH__REQUESTED_AUTHN_CONTEXT_COMPARISON=exact # Avoid double package_show call to add tracking info -CKANEXT__DATAGOVCATALOG__ADD_PACKAGES_TRACKING_INFO=false \ No newline at end of file +CKANEXT__DATAGOVCATALOG__ADD_PACKAGES_TRACKING_INFO=false From 956093eb2875b0cbe05cad35f1c4ca2637e64fbc Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Tue, 4 Jan 2022 17:03:26 -0500 Subject: [PATCH 09/19] reintroduce: ckan core config files for collection creation --- ckan/setup/solr/protwords.txt | 21 +++++++++++++++ {solr => ckan/setup/solr}/solrconfig.xml | 0 ckan/setup/solr/stopwords.txt | 14 ++++++++++ ckan/setup/solr/synonyms.txt | 29 +++++++++++++++++++++ solr/Dockerfile | 33 +----------------------- 5 files changed, 65 insertions(+), 32 deletions(-) create mode 100644 ckan/setup/solr/protwords.txt rename {solr => ckan/setup/solr}/solrconfig.xml (100%) create mode 100644 ckan/setup/solr/stopwords.txt create mode 100644 ckan/setup/solr/synonyms.txt diff --git a/ckan/setup/solr/protwords.txt b/ckan/setup/solr/protwords.txt new file mode 100644 index 00000000..5a32e503 --- /dev/null +++ b/ckan/setup/solr/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/solrconfig.xml b/ckan/setup/solr/solrconfig.xml similarity index 100% rename from solr/solrconfig.xml rename to ckan/setup/solr/solrconfig.xml diff --git a/ckan/setup/solr/stopwords.txt b/ckan/setup/solr/stopwords.txt new file mode 100644 index 00000000..25b47f6a --- /dev/null +++ b/ckan/setup/solr/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ckan/setup/solr/synonyms.txt b/ckan/setup/solr/synonyms.txt new file mode 100644 index 00000000..d1ed0dc7 --- /dev/null +++ b/ckan/setup/solr/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/Dockerfile b/solr/Dockerfile index 568dcdc5..5e28028f 100644 --- a/solr/Dockerfile +++ b/solr/Dockerfile @@ -2,30 +2,6 @@ FROM solr:8.11-slim MAINTAINER Open Knowledge -######################## -## CKAN Specific Changes -## Pulled from https://github.com/ckan/ckan-solr/blob/master/solr-8/Dockerfile -######################## - -EXPOSE 8983 - -ENV SOLR_CORE ckan -ARG CKAN_BRANCH="dev-v2.9" - -ENV SOLR_CONFIG_DIR="/opt/solr/server/solr/configsets" -ENV SOLR_SCHEMA_FILE="$SOLR_CONFIG_DIR/ckan/conf/managed-schema" - -# Set user to root for initial configuration -USER root - -# Create a CKAN configset by copying the default one -RUN cp -R $SOLR_CONFIG_DIR/_default $SOLR_CONFIG_DIR/ckan - -# Update the schema -ADD https://raw.githubusercontent.com/ckan/ckan/$CKAN_BRANCH/ckan/config/solr/schema.xml $SOLR_SCHEMA_FILE -RUN chmod 644 $SOLR_SCHEMA_FILE - - ####################### ## GSA Specific Changes (also need root user) ####################### @@ -84,13 +60,6 @@ RUN perl -0777 -i -pe 's/ \ /opt/solr/server/etc/webdefault.xml -# Incorporate Fix from https://stackoverflow.com/a/31320697 -ADD solrconfig.xml $SOLR_CONFIG_DIR/ckan/conf/solrconfig.xml - -# Fim from https://github.com/ckan/ckan/issues/5585#issuecomment-953586246 -RUN sed -i "s/text<\/defaultSearchField>/text<\/df>/" $SOLR_SCHEMA_FILE -RUN sed -i "s///" $SOLR_SCHEMA_FILE - # Giving ownership to user 'solr' RUN mkdir -p /opt/solr/server/solr/$SOLR_CORE/data/index RUN chown -R $SOLR_USER:$SOLR_USER /opt/solr/server/solr/ @@ -98,4 +67,4 @@ RUN chown -R $SOLR_USER:$SOLR_USER /opt/solr/server/solr/ # Set user to 'solr' for to comply with solr security USER solr -CMD ["sh", "-c", "solr-precreate ckan $SOLR_CONFIG_DIR/ckan"] +# CMD ["sh", "-c", "solr-precreate ckan $SOLR_CONFIG_DIR/ckan"] From da26a8614a55b5a5697b8cd871367bed184b23a7 Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Tue, 4 Jan 2022 17:05:31 -0500 Subject: [PATCH 10/19] new: move solr ckan core creation to ckan app --- {solr => ckan/setup}/migrate-solrcloud-schema.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) rename {solr => ckan/setup}/migrate-solrcloud-schema.sh (72%) diff --git a/solr/migrate-solrcloud-schema.sh b/ckan/setup/migrate-solrcloud-schema.sh similarity index 72% rename from solr/migrate-solrcloud-schema.sh rename to ckan/setup/migrate-solrcloud-schema.sh index 91bec5e2..e74732ab 100755 --- a/solr/migrate-solrcloud-schema.sh +++ b/ckan/setup/migrate-solrcloud-schema.sh @@ -23,9 +23,16 @@ if ! (curl --get --fail --location-trusted --user $CKAN_SOLR_USER:$CKAN_SOLR_PA --data-urlencode action=list \ --data-urlencode wt=json | grep -q $COLLECTION_NAME); then + CKAN_BRANCH="dev-v2.9" + curl https://raw.githubusercontent.com/ckan/ckan/$CKAN_BRANCH/ckan/config/solr/schema.xml -o managed-schema + + # Fix from https://github.com/ckan/ckan/issues/5585#issuecomment-953586246 + sed -i "s/text<\/defaultSearchField>/text<\/df>/" managed-schema + sed -i "s///" managed-schema + # Zip solr configSet - cd solr && zip ckan_2.9_solr_config.zip \ - currency.xml elevate.xml protwords.txt schema.xml solrconfig.xml stopwords.txt synonyms.txt + zip ckan_2.9_solr_config.zip \ + managed-schema solrconfig.xml protwords.txt stopwords.txt synonyms.txt echo "Uploading config set..." curl --fail --location-trusted --user $CKAN_SOLR_USER:$CKAN_SOLR_PASSWORD \ @@ -36,5 +43,4 @@ if ! (curl --get --fail --location-trusted --user $CKAN_SOLR_USER:$CKAN_SOLR_PA curl --fail --location-trusted --user $CKAN_SOLR_USER:$CKAN_SOLR_PASSWORD \ "$CKAN_SOLR_BASE_URL/solr/admin/collections?action=create&name=$COLLECTION_NAME&collection.configName=$COLLECTION_NAME&numShards=1" \ -X POST - cd - fi From cd4c1e3df4fa7c3e00a4f839d1ab8d8e01a67249 Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Tue, 4 Jan 2022 17:14:27 -0500 Subject: [PATCH 11/19] new: local solrcloud with zookeeper docker compose works --- ckan/Dockerfile | 6 ++- ckan/setup/ckan_setup.sh | 4 +- ckan/setup/migrate-solrcloud-schema.sh | 4 ++ docker-compose.yml | 56 +++++++++++++++++++++++++- solr/Dockerfile | 1 + 5 files changed, 67 insertions(+), 4 deletions(-) diff --git a/ckan/Dockerfile b/ckan/Dockerfile index e57bde52..d847d18c 100644 --- a/ckan/Dockerfile +++ b/ckan/Dockerfile @@ -5,7 +5,7 @@ FROM openknowledge/ckan-dev:2.9 ENV GIT_BRANCH=2.9 # add dependencies for cryptography and vim -RUN apk add libressl-dev musl-dev libffi-dev xmlsec vim xmlsec-dev openjdk11 +RUN apk add libressl-dev musl-dev libffi-dev xmlsec vim xmlsec-dev openjdk11 zip # Download Saxon jar for FGDC2ISO transform (geodatagov) ARG saxon_ver=9.9.1-7 ADD \ @@ -26,6 +26,10 @@ COPY docker-entrypoint.d/* /docker-entrypoint.d/ COPY setup/gunicorn.conf.py ${APP_DIR}/ COPY setup/server_start.sh ${APP_DIR}/ +# Add ckan solr core files +ADD setup/solr/* ${APP_DIR}/solr/ +ADD setup/migrate-solrcloud-schema.sh ${APP_DIR}/ + # Custom prerun script for Solr 8 COPY setup/GSA_prerun.py ${APP_DIR}/ diff --git a/ckan/setup/ckan_setup.sh b/ckan/setup/ckan_setup.sh index e139c88a..570f5180 100755 --- a/ckan/setup/ckan_setup.sh +++ b/ckan/setup/ckan_setup.sh @@ -50,8 +50,10 @@ ckan config-tool $SRC_DIR/ckan/test-core.ini \ "solr_url = $TEST_CKAN_SOLR_URL" \ "ckan.redis.url = $TEST_CKAN_REDIS_URL" +# Add ckan core to solr +/app/ckan/setup/migrate-solrcloud-schema.sh + # Run the prerun script to init CKAN and create the default admin user -# python3 prerun.py python GSA_prerun.py # Run any startup scripts provided by images extending this one diff --git a/ckan/setup/migrate-solrcloud-schema.sh b/ckan/setup/migrate-solrcloud-schema.sh index e74732ab..c3f252a1 100755 --- a/ckan/setup/migrate-solrcloud-schema.sh +++ b/ckan/setup/migrate-solrcloud-schema.sh @@ -23,6 +23,8 @@ if ! (curl --get --fail --location-trusted --user $CKAN_SOLR_USER:$CKAN_SOLR_PA --data-urlencode action=list \ --data-urlencode wt=json | grep -q $COLLECTION_NAME); then + cd solr + CKAN_BRANCH="dev-v2.9" curl https://raw.githubusercontent.com/ckan/ckan/$CKAN_BRANCH/ckan/config/solr/schema.xml -o managed-schema @@ -43,4 +45,6 @@ if ! (curl --get --fail --location-trusted --user $CKAN_SOLR_USER:$CKAN_SOLR_PA curl --fail --location-trusted --user $CKAN_SOLR_USER:$CKAN_SOLR_PASSWORD \ "$CKAN_SOLR_BASE_URL/solr/admin/collections?action=create&name=$COLLECTION_NAME&collection.configName=$COLLECTION_NAME&numShards=1" \ -X POST + + cd - fi diff --git a/docker-compose.yml b/docker-compose.yml index 836fcc9c..80c66231 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,11 +24,57 @@ services: - pg_data:/var/lib/postgresql/data solr: - image: ghcr.io/gsa/catalog.data.gov.solr:8-curl + # image: ghcr.io/gsa/catalog.data.gov.solr:8-curl + image: ghcr.io/gsa/catalog.data.gov.solr:latest + environment: + - ZK_HOST=zookeeper1:2181,zookeeper2:2182,zookeeper3:2183 ports: - "8983:8983" + depends_on: + - zookeeper1 + - zookeeper2 + - zookeeper3 + volumes: + - solr_data:/var/solr + + zookeeper1: + image: zookeeper:3.7 + restart: always + ports: + - 2181:2181 + hostname: zookeeper1 + environment: + ZOO_MY_ID: 1 + ZOO_SERVERS: server.1=0.0.0.0:2888:3888;2181 server.2=zookeeper2:2888:3888;2181 server.3=zookeeper3:2888:3888;2181 + volumes: + - zookeeperdata:/data + - zookeeperdatalog:/datalog + + zookeeper2: + image: zookeeper:3.7 + restart: always + ports: + - 2182:2181 + hostname: zookeeper2 + environment: + ZOO_MY_ID: 2 + ZOO_SERVERS: server.1=zookeeper1:2888:3888;2181 server.2=0.0.0.0:2888:3888;2181 server.3=zookeeper3:2888:3888;2181 + volumes: + - zookeeperdata2:/data + - zookeeperdatalog2:/datalog + + zookeeper3: + image: zookeeper:3.7 + restart: always + ports: + - 2183:2181 + hostname: zookeeper3 + environment: + ZOO_MY_ID: 3 + ZOO_SERVERS: server.1=zookeeper1:2888:3888;2181 server.2=zookeeper2:2888:3888;2181 server.3=0.0.0.0:2888:3888;2181 volumes: - - solr_data:/opt/solr/server/solr/ckan/data/index + - zookeeperdata3:/data + - zookeeperdatalog3:/datalog redis: image: redis:alpine @@ -53,3 +99,9 @@ volumes: ckan_storage: pg_data: solr_data: + zookeeperdata: + zookeeperdatalog: + zookeeperdata2: + zookeeperdatalog2: + zookeeperdata3: + zookeeperdatalog3: diff --git a/solr/Dockerfile b/solr/Dockerfile index 5e28028f..5adf0440 100644 --- a/solr/Dockerfile +++ b/solr/Dockerfile @@ -1,6 +1,7 @@ FROM solr:8.11-slim MAINTAINER Open Knowledge +USER root ####################### ## GSA Specific Changes (also need root user) From d5f60d0c2f5cb156eaeb731149ff414298adbde7 Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Tue, 4 Jan 2022 17:28:18 -0500 Subject: [PATCH 12/19] update: fix solr core creation for local and cloud.gov execution --- .gitignore | 3 ++- .profile | 2 +- ckan/Dockerfile | 4 ---- ckan/setup/migrate-solrcloud-schema.sh | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 5bd23765..47a82750 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,5 @@ e2e/cypress/results/output.xml node_modules package-lock.json -solr/*.zip \ No newline at end of file +ckan/setup/solr/*.zip +ckan/setup/solr/managed-schema diff --git a/.profile b/.profile index 8bcb77fd..466239a7 100755 --- a/.profile +++ b/.profile @@ -82,7 +82,7 @@ export CKANEXT__SAML2AUTH__SYSADMINS_LIST=$(echo $VCAP_SERVICES | jq --raw-outpu # Set up the collection in Solr echo Setting up Solr collection export SOLR_COLLECTION=ckan -./solr/migrate-solrcloud-schema.sh $SOLR_COLLECTION +./ckan/setup/migrate-solrcloud-schema.sh $SOLR_COLLECTION export CKAN_SOLR_URL=$CKAN_SOLR_BASE_URL/solr/$SOLR_COLLECTION # Write out any files and directories diff --git a/ckan/Dockerfile b/ckan/Dockerfile index d847d18c..4d030862 100644 --- a/ckan/Dockerfile +++ b/ckan/Dockerfile @@ -26,10 +26,6 @@ COPY docker-entrypoint.d/* /docker-entrypoint.d/ COPY setup/gunicorn.conf.py ${APP_DIR}/ COPY setup/server_start.sh ${APP_DIR}/ -# Add ckan solr core files -ADD setup/solr/* ${APP_DIR}/solr/ -ADD setup/migrate-solrcloud-schema.sh ${APP_DIR}/ - # Custom prerun script for Solr 8 COPY setup/GSA_prerun.py ${APP_DIR}/ diff --git a/ckan/setup/migrate-solrcloud-schema.sh b/ckan/setup/migrate-solrcloud-schema.sh index c3f252a1..ff2e38f4 100755 --- a/ckan/setup/migrate-solrcloud-schema.sh +++ b/ckan/setup/migrate-solrcloud-schema.sh @@ -23,7 +23,7 @@ if ! (curl --get --fail --location-trusted --user $CKAN_SOLR_USER:$CKAN_SOLR_PA --data-urlencode action=list \ --data-urlencode wt=json | grep -q $COLLECTION_NAME); then - cd solr + cd $(dirname $0)/solr CKAN_BRANCH="dev-v2.9" curl https://raw.githubusercontent.com/ckan/ckan/$CKAN_BRANCH/ckan/config/solr/schema.xml -o managed-schema From 3f14eba93c84c48f23430c6cb5d80408fca5f61a Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Tue, 4 Jan 2022 17:33:51 -0500 Subject: [PATCH 13/19] fix: use a less-modified solr 8 with curl image --- docker-compose.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 80c66231..7cc835be 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,8 +24,7 @@ services: - pg_data:/var/lib/postgresql/data solr: - # image: ghcr.io/gsa/catalog.data.gov.solr:8-curl - image: ghcr.io/gsa/catalog.data.gov.solr:latest + image: ghcr.io/gsa/catalog.data.gov.solr:8-curl environment: - ZK_HOST=zookeeper1:2181,zookeeper2:2182,zookeeper3:2183 ports: From 739acd31d74d21ed72d86ba2a53d202d40f226be Mon Sep 17 00:00:00 2001 From: Nicholas Kumia Date: Wed, 5 Jan 2022 11:16:48 -0500 Subject: [PATCH 14/19] test: see ckan startup on github actions --- .github/workflows/commit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/commit.yml b/.github/workflows/commit.yml index ce6e0e2a..c583f25e 100644 --- a/.github/workflows/commit.yml +++ b/.github/workflows/commit.yml @@ -15,7 +15,7 @@ jobs: - name: build run: make build - name: test - run: make test + run: docker-compose -f docker-compose.yml -f docker-compose.test.yml up ckan - name: cypress-artifacs uses: actions/upload-artifact@v2 if: failure() From e75d3f388b32c627520d243baf72d77dd769bff7 Mon Sep 17 00:00:00 2001 From: jbrown-xentity Date: Wed, 5 Jan 2022 09:49:07 -0700 Subject: [PATCH 15/19] Wait for solr to be complete. --- ckan/setup/ckan_setup.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ckan/setup/ckan_setup.sh b/ckan/setup/ckan_setup.sh index 570f5180..b0dcca75 100755 --- a/ckan/setup/ckan_setup.sh +++ b/ckan/setup/ckan_setup.sh @@ -53,9 +53,12 @@ ckan config-tool $SRC_DIR/ckan/test-core.ini \ # Add ckan core to solr /app/ckan/setup/migrate-solrcloud-schema.sh +sleep 20 + # Run the prerun script to init CKAN and create the default admin user python GSA_prerun.py +sleep 20 # Run any startup scripts provided by images extending this one if [[ -d "/docker-entrypoint.d" ]] then From fb124df0680722d0834aa81647ba363138999a90 Mon Sep 17 00:00:00 2001 From: jbrown-xentity Date: Wed, 5 Jan 2022 09:51:44 -0700 Subject: [PATCH 16/19] Update make test --- .github/workflows/commit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/commit.yml b/.github/workflows/commit.yml index c583f25e..ce6e0e2a 100644 --- a/.github/workflows/commit.yml +++ b/.github/workflows/commit.yml @@ -15,7 +15,7 @@ jobs: - name: build run: make build - name: test - run: docker-compose -f docker-compose.yml -f docker-compose.test.yml up ckan + run: make test - name: cypress-artifacs uses: actions/upload-artifact@v2 if: failure() From bd1e0ca1796d85948a0c439b3df635d0d599c388 Mon Sep 17 00:00:00 2001 From: jbrown-xentity Date: Wed, 5 Jan 2022 10:08:22 -0700 Subject: [PATCH 17/19] SOLR fails to come up, review all logs on github --- .github/workflows/commit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/commit.yml b/.github/workflows/commit.yml index ce6e0e2a..f12e0643 100644 --- a/.github/workflows/commit.yml +++ b/.github/workflows/commit.yml @@ -15,7 +15,7 @@ jobs: - name: build run: make build - name: test - run: make test + run: docker-compose -f docker-compose.yml -f docker-compose.test.yml up - name: cypress-artifacs uses: actions/upload-artifact@v2 if: failure() From a1b5a4308066cceb5324745ff3e28ac6f72ed75b Mon Sep 17 00:00:00 2001 From: jbrown-xentity Date: Wed, 5 Jan 2022 10:32:04 -0700 Subject: [PATCH 18/19] SOLR start takes too long It seems like SOLR is ready to serve traffic 3 seconds after ckan tries to run the setup. Wait 20 seconds to be safe. --- ckan/setup/ckan_setup.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ckan/setup/ckan_setup.sh b/ckan/setup/ckan_setup.sh index b0dcca75..075e548f 100755 --- a/ckan/setup/ckan_setup.sh +++ b/ckan/setup/ckan_setup.sh @@ -50,6 +50,8 @@ ckan config-tool $SRC_DIR/ckan/test-core.ini \ "solr_url = $TEST_CKAN_SOLR_URL" \ "ckan.redis.url = $TEST_CKAN_REDIS_URL" +sleep 20 # SOLR takes a while to boot up in zookeeper mode + # Add ckan core to solr /app/ckan/setup/migrate-solrcloud-schema.sh From 5a0da08287248028830e4dbe1e3860cf2649537a Mon Sep 17 00:00:00 2001 From: jbrown-xentity Date: Wed, 5 Jan 2022 11:41:33 -0700 Subject: [PATCH 19/19] Remove debugging, setup solr wait --- .github/workflows/commit.yml | 2 +- ckan/setup/ckan_setup.sh | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/workflows/commit.yml b/.github/workflows/commit.yml index f12e0643..ce6e0e2a 100644 --- a/.github/workflows/commit.yml +++ b/.github/workflows/commit.yml @@ -15,7 +15,7 @@ jobs: - name: build run: make build - name: test - run: docker-compose -f docker-compose.yml -f docker-compose.test.yml up + run: make test - name: cypress-artifacs uses: actions/upload-artifact@v2 if: failure() diff --git a/ckan/setup/ckan_setup.sh b/ckan/setup/ckan_setup.sh index 075e548f..ef2d008e 100755 --- a/ckan/setup/ckan_setup.sh +++ b/ckan/setup/ckan_setup.sh @@ -50,17 +50,24 @@ ckan config-tool $SRC_DIR/ckan/test-core.ini \ "solr_url = $TEST_CKAN_SOLR_URL" \ "ckan.redis.url = $TEST_CKAN_REDIS_URL" -sleep 20 # SOLR takes a while to boot up in zookeeper mode +# SOLR takes a while to boot up in zookeeper mode, make sure it's up before +echo "Validating SOLR is up..." +NEXT_WAIT_TIME=0 +until [ $NEXT_WAIT_TIME -eq 10 ] || curl --get --fail --quiet --location-trusted --user $CKAN_SOLR_USER:$CKAN_SOLR_PASSWORD \ + $CKAN_SOLR_BASE_URL/solr/admin/collections \ + --data-urlencode action=list \ + --data-urlencode wt=json; do + sleep $(( NEXT_WAIT_TIME++ )) + echo "SOLR still not up, trying for the $NEXT_WAIT_TIME time" +done +[ $NEXT_WAIT_TIME -lt 10 ] # Add ckan core to solr /app/ckan/setup/migrate-solrcloud-schema.sh -sleep 20 - # Run the prerun script to init CKAN and create the default admin user python GSA_prerun.py -sleep 20 # Run any startup scripts provided by images extending this one if [[ -d "/docker-entrypoint.d" ]] then