Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use standalone Solr on ECS #460

Merged
merged 5 commits into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ TEST_CKAN_DATASTORE_WRITE_URL=postgresql://ckan:ckan@db/datastore_test
TEST_CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@db/datastore_test

# Other services connections
COLLECTION_NAME=ckan_local
CKAN_SOLR_URL=http://solr:8983/solr/${COLLECTION_NAME}
CKAN_REDIS_URL=redis://redis:6379/1

CKAN_SOLR_BASE_URL=http://solr:8983
CKAN_SOLR_USER=admin
CKAN_SOLR_PASSWORD=pass
CKAN_SOLR_USER=catalog
CKAN_SOLR_PASSWORD='Bleeding-Edge'
COLLECTION_NAME=ckan

CKAN_SOLR_URL="http://solr:8983/solr/${COLLECTION_NAME}"
CKAN_REDIS_URL=redis://redis:6379/1

TEST_CKAN_SOLR_URL=http://solr:8983/solr/${COLLECTION_NAME}
TEST_CKAN_SOLR_URL="http://${CKAN_SOLR_USER}:${CKAN_SOLR_PASSWORD}@solr:8983/solr/${COLLECTION_NAME}"
TEST_CKAN_REDIS_URL=redis://redis:6379/1

# Core settings
Expand Down
2 changes: 1 addition & 1 deletion .profile
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ export CKANEXT__SAML2AUTH__SYSADMINS_LIST=$(echo $VCAP_SERVICES | jq --raw-outpu
# Set up the collection in Solr
echo Setting up Solr collection
export SOLR_COLLECTION=ckan
./ckan/setup/migrate-solrcloud-schema.sh $SOLR_COLLECTION
# ./ckan/setup/migrate-solrcloud-schema.sh $SOLR_COLLECTION
export CKAN_SOLR_URL=$CKAN_SOLR_BASE_URL/solr/$SOLR_COLLECTION

# Explicitly don't proxy solr,
Expand Down
11 changes: 8 additions & 3 deletions ckan/setup/GSA_prerun.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import sys
import time
import requests
try:
from urllib.request import urlopen
from urllib.error import URLError
Expand All @@ -20,19 +21,22 @@ def check_solr_connection(retry=None):
print("[prerun] Giving up after 5 tries...")
sys.exit(1)

url = os.environ.get("CKAN_SOLR_URL", "")
CKAN_SOLR_USER = os.environ.get("CKAN_SOLR_USER", "")
CKAN_SOLR_PASSWORD = os.environ.get("CKAN_SOLR_PASSWORD", "")
url = os.environ.get("CKAN_SOLR_URL", "").replace('http://', f'http://{CKAN_SOLR_USER}:{CKAN_SOLR_PASSWORD}@')
search_url = "{url}/select/?q=*&wt=json".format(url=url)

try:
connection = urlopen(search_url)
# Using requests to add username and password to URL
connection = requests.request("GET", search_url)
except URLError as e:
print(str(e))
print("[prerun] Unable to connect to solr, waiting...")
time.sleep(10)
check_solr_connection(retry=retry - 1)
else:
try:
pythonified = str(connection.read()).replace('true', 'True')
pythonified = str(connection.text).replace('true', 'True')
eval(pythonified)
except TypeError:
pass
Expand All @@ -50,5 +54,6 @@ def check_solr_connection(retry=None):
pr.update_plugins()
pr.check_datastore_db_connection()
pr.init_datastore_db()
# This function does not work, but solr is up
check_solr_connection()
pr.create_sysadmin()
17 changes: 2 additions & 15 deletions ckan/setup/ckan_setup.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

set -e

# Comes from https://github.com/okfn/docker-ckan/blob/master/ckan-dev/2.9/setup/start_ckan_development.sh
# This replaces running commands as user ckan and
# allows the user to run any command they want after ckan is setup
Expand Down Expand Up @@ -50,21 +52,6 @@ ckan config-tool $SRC_DIR/ckan/test-core.ini \
"solr_url = $TEST_CKAN_SOLR_URL" \
"ckan.redis.url = $TEST_CKAN_REDIS_URL"

# SOLR takes a while to boot up in zookeeper mode, make sure it's up before
echo "Validating SOLR is up..."
NEXT_WAIT_TIME=0
until [ $NEXT_WAIT_TIME -eq 20 ] || curl --get --fail --location-trusted --user $CKAN_SOLR_USER:$CKAN_SOLR_PASSWORD \
$CKAN_SOLR_BASE_URL/solr/admin/collections \
--data-urlencode action=list \
--data-urlencode wt=json; do
sleep $(( NEXT_WAIT_TIME++ ))
echo "SOLR still not up, trying for the $NEXT_WAIT_TIME time"
done
[ $NEXT_WAIT_TIME -lt 20 ]

# Add ckan core to solr
/app/ckan/setup/migrate-solrcloud-schema.sh $COLLECTION_NAME

# Run the prerun script to init CKAN and create the default admin user
python GSA_prerun.py

Expand Down
64 changes: 8 additions & 56 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,58 +24,15 @@ services:
- pg_data:/var/lib/postgresql/data

solr:
image: ghcr.io/gsa/catalog.data.gov.solr:8-curl
environment:
- ZK_HOST=zookeeper1:2181,zookeeper2:2182,zookeeper3:2183
# ports:
# - "8983:8983"
deploy:
replicas: 3
depends_on:
- zookeeper1
- zookeeper2
- zookeeper3
# volumes:
# - solr_data:/var/solr

zookeeper1:
image: zookeeper:3.7
restart: always
ports:
- 2181:2181
hostname: zookeeper1
environment:
ZOO_MY_ID: 1
ZOO_SERVERS: server.1=0.0.0.0:2888:3888;2181 server.2=zookeeper2:2888:3888;2181 server.3=zookeeper3:2888:3888;2181
volumes:
- zookeeperdata:/data
- zookeeperdatalog:/datalog

zookeeper2:
image: zookeeper:3.7
restart: always
image: ghcr.io/gsa/catalog.data.gov.solr:8-stunnel-root
command: /app/solr/local_setup.sh
ports:
- 2182:2181
hostname: zookeeper2
environment:
ZOO_MY_ID: 2
ZOO_SERVERS: server.1=zookeeper1:2888:3888;2181 server.2=0.0.0.0:2888:3888;2181 server.3=zookeeper3:2888:3888;2181
volumes:
- zookeeperdata2:/data
- zookeeperdatalog2:/datalog

zookeeper3:
image: zookeeper:3.7
restart: always
ports:
- 2183:2181
hostname: zookeeper3
environment:
ZOO_MY_ID: 3
ZOO_SERVERS: server.1=zookeeper1:2888:3888;2181 server.2=zookeeper2:2888:3888;2181 server.3=0.0.0.0:2888:3888;2181
- "8983:8983"
deploy:
replicas: 1
volumes:
- zookeeperdata3:/data
- zookeeperdatalog3:/datalog
- solr_data:/var/solr
- .:/app

redis:
image: redis:alpine
Expand All @@ -99,9 +56,4 @@ services:
volumes:
ckan_storage:
pg_data:
zookeeperdata:
zookeeperdatalog:
zookeeperdata2:
zookeeperdatalog2:
zookeeperdata3:
zookeeperdatalog3:
solr_data:
20 changes: 16 additions & 4 deletions solr/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,17 @@ USER root
#######################

# Add curl to access API routes
RUN apt-get update && apt-get install -y curl
RUN apt-get update && apt-get install -y curl vim stunnel4 git

# Install EFS Dependencies
RUN git clone https://github.com/aws/efs-utils && \
cd efs-utils && \
apt-get -y install binutils && \
./build-deb.sh && \
apt-get -y install ./build/amazon-efs-utils*deb

# Install hostname resolution dependencies
RUN apt-get install -y dnsutils

# Fix Issue https://github.com/GSA/datagov-deploy/issues/3285
# Update the angularjs library files
Expand Down Expand Up @@ -64,8 +74,10 @@ RUN perl -0777 -i -pe 's/ \
# Giving ownership to user 'solr'
RUN mkdir -p /opt/solr/server/solr/$SOLR_CORE/data/index
RUN chown -R $SOLR_USER:$SOLR_USER /opt/solr/server/solr/
#
# Set user to 'solr' for to comply with solr security
USER solr

# Leave user as 'root' since we need to mount EFS volume which requires root
# The user will be set back to 'solr' before solr is started
# Starting Solr can be done with something like...
# su -c "init-var-solr; precreate-core ckan /tmp/ckan_config; chown -R 8983:8983 /var/solr/data; solr-fg -m <ram>" -m solr

# CMD ["sh", "-c", "solr-precreate ckan $SOLR_CONFIG_DIR/ckan"]
7 changes: 7 additions & 0 deletions solr/local_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

# Setup ckan core config
/app/solr/solr_setup.sh

# Start solr
su -c "init-var-solr; precreate-core ckan /tmp/ckan_config; chown -R 8983:8983 /var/solr/data; solr-fg" -m solr
14 changes: 14 additions & 0 deletions solr/security.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"authentication":{
"blockUnknown": true,
"class":"solr.BasicAuthPlugin",
"credentials":{"catalog":"rJzrn+HooKn79Q+cfysdGKmMhJbtj0Q1bTokFud6f9o= eKuBUjAoBIkJAMYZxJU6HOKSchTAce+DoQrY5Vewu7I="},
"realm":"data.gov users",
"forwardCredentials": false
},
"authorization":{
"class":"solr.RuleBasedAuthorizationPlugin",
"permissions":[{"name":"security-edit", "role":"admin"}],
"user-role":{"catalog":"admin"}
}
}
9 changes: 3 additions & 6 deletions solr/service-config.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
{
"solrImageRepo": "ghcr.io/gsa/catalog.data.gov.solr",
"solrImageTag": "8-curl",
"replicas": 3,
"solrJavaMem": "-Xms30g -Xmx32g",
"solrMem": "34G",
"solrCpu": "17000m",
"restartCron": "0 0 1 1 5"
"solrImageTag": "8-stunnel-root",
"solrMem": 12288,
"solrCpu": 3072
}
31 changes: 31 additions & 0 deletions solr/solr_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

mkdir -p /tmp/ckan_config

# add solr authentication
cat <<SOLRAUTH > /var/solr/data/security.json
{
"authentication":{
"blockUnknown": true,
"class":"solr.BasicAuthPlugin",
"credentials":{"catalog":"rJzrn+HooKn79Q+cfysdGKmMhJbtj0Q1bTokFud6f9o= eKuBUjAoBIkJAMYZxJU6HOKSchTAce+DoQrY5Vewu7I="},
"realm":"data.gov users",
"forwardCredentials": false
},
"authorization":{
"class":"solr.RuleBasedAuthorizationPlugin",
"permissions":[{"name":"security-edit",
"role":"admin"}],
"user-role":{"catalog":"admin"}
}}
SOLRAUTH

# add solr config files for ckan 2.9
wget -O /tmp/ckan_config/schema.xml https://raw.githubusercontent.com/GSA/catalog.data.gov/main/ckan/setup/solr/managed-schema
wget -O /tmp/ckan_config/protwords.txt https://raw.githubusercontent.com/GSA/catalog.data.gov/main/ckan/setup/solr/protwords.txt
wget -O /tmp/ckan_config/solrconfig.xml https://raw.githubusercontent.com/GSA/catalog.data.gov/main/ckan/setup/solr/solrconfig.xml
wget -O /tmp/ckan_config/stopwords.txt https://raw.githubusercontent.com/GSA/catalog.data.gov/main/ckan/setup/solr/stopwords.txt
wget -O /tmp/ckan_config/synonyms.txt https://raw.githubusercontent.com/GSA/catalog.data.gov/main/ckan/setup/solr/synonyms.txt

# group user solr:solr is 8983:8983 in solr docker image
chown -R 8983:8983 /var/solr/data/