- Notifications
You must be signed in to change notification settings - Fork 271
Replacing DataPusher with XLoader
Here we describe the changes needed to this (ckan-docker) repository to use the XLoader CKAN extension rather than DataPusher. Similar to DataPusher, XLoader is used to automatically download any tabular data files like CSV or Excel from resources when they are added to the CKAN site, parses them to extract the actual data, and then uses the DataStore API to push the data into the CKAN's DataStore database
There is one caveat though: The NGINX reverse proxy container has been taken out of the configuration.
XLoader is installed into the same container as CKAN core
At the time of writing (July 2023) the version of CKAN used is 2.10.1 and xloader 1.0.1
There are 3 file changes required:
docker-compose.yml.envDockerfile
Also an xloader/ directory that includes the Dockerfile described below is needed
- docker-compose.yml
version: "3" volumes: ckan_storage: pg_data: solr_data: services: ckan-xloader: container_name: ${XLOADER_CONTAINER_NAME} build: context: xloader/ dockerfile: Dockerfile args: - TZ=${TZ} networks: - xloadernet - dbnet - solrnet - redisnet env_file: - .env depends_on: db: condition: service_healthy solr: condition: service_healthy redis: condition: service_healthy volumes: - ckan_storage:/var/lib/ckan ports: - "0.0.0.0:${XLOADER_PORT_HOST}:${XLOADER_PORT}" restart: unless-stopped healthcheck: test: ["CMD", "wget", "-qO", "/dev/null", "http://xloader:5000"] db: container_name: ${POSTGRESQL_CONTAINER_NAME} build: context: postgresql/ networks: - dbnet environment: - POSTGRES_USER - POSTGRES_PASSWORD - POSTGRES_DB - PGDATA - CKAN_DB_USER - CKAN_DB_PASSWORD - CKAN_DB - DATASTORE_READONLY_USER - DATASTORE_READONLY_PASSWORD - DATASTORE_DB volumes: - pg_data:/var/lib/postgresql/data restart: unless-stopped healthcheck: test: ["CMD", "pg_isready", "-U", "${POSTGRES_USER}", "-d", "${POSTGRES_DB}"] solr: container_name: ${SOLR_CONTAINER_NAME} networks: - solrnet image: ckan/ckan-solr:${SOLR_IMAGE_VERSION} volumes: - solr_data:/var/solr restart: unless-stopped healthcheck: test: ["CMD", "wget", "-qO", "/dev/null", "http://localhost:8983/solr/"] redis: container_name: ${REDIS_CONTAINER_NAME} image: redis:${REDIS_VERSION} networks: - redisnet restart: unless-stopped healthcheck: test: ["CMD", "redis-cli", "-e", "QUIT"] networks: xloadernet: solrnet: internal: true dbnet: internal: true redisnet: internal: true .env
# Container names NGINX_CONTAINER_NAME=nginx REDIS_CONTAINER_NAME=redis POSTGRESQL_CONTAINER_NAME=db SOLR_CONTAINER_NAME=solr DATAPUSHER_CONTAINER_NAME=datapusher CKAN_CONTAINER_NAME=ckan XLOADER_CONTAINER_NAME=xloader # Host Ports CKAN_PORT_HOST=5000 NGINX_PORT_HOST=81 NGINX_SSLPORT_HOST=8443 # CKAN databases POSTGRES_USER=postgres POSTGRES_PASSWORD=postgres POSTGRES_DB=postgres POSTGRES_HOST=db PGDATA=/var/lib/postgresql/data/db CKAN_DB_USER=ckandbuser CKAN_DB_PASSWORD=ckandbpassword CKAN_DB=ckandb DATASTORE_READONLY_USER=datastore_ro DATASTORE_READONLY_PASSWORD=datastore DATASTORE_DB=datastore CKAN_SQLALCHEMY_URL=postgresql://ckandbuser:ckandbpassword@db/ckandb CKAN_DATASTORE_WRITE_URL=postgresql://ckandbuser:ckandbpassword@db/datastore CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@db/datastore # Test database connections TEST_CKAN_SQLALCHEMY_URL=postgres://ckan:ckan@db/ckan_test TEST_CKAN_DATASTORE_WRITE_URL=postgresql://ckan:ckan@db/datastore_test TEST_CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@db/datastore_test # CKAN core CKAN_VERSION=2.10.0 CKAN_SITE_ID=default CKAN_SITE_URL=http://localhost:5000 CKAN_PORT=5000 CKAN_PORT_HOST=5000 CKAN___BEAKER__SESSION__SECRET=CHANGE_ME # See https://docs.ckan.org/en/latest/maintaining/configuration.html#api-token-settings CKAN___API_TOKEN__JWT__ENCODE__SECRET=string:CHANGE_ME CKAN___API_TOKEN__JWT__DECODE__SECRET=string:CHANGE_ME CKAN_SYSADMIN_NAME=ckan_admin CKAN_SYSADMIN_PASSWORD=test1234 CKAN_SYSADMIN_EMAIL=your_email@example.com CKAN_STORAGE_PATH=/var/lib/ckan CKAN_SMTP_SERVER=smtp.corporateict.domain:25 CKAN_SMTP_STARTTLS=True CKAN_SMTP_USER=user CKAN_SMTP_PASSWORD=pass CKAN_SMTP_MAIL_FROM=ckan@localhost TZ=UTC # Solr SOLR_IMAGE_VERSION=2.9-solr8 CKAN_SOLR_URL=http://solr:8983/solr/ckan TEST_CKAN_SOLR_URL=http://solr:8983/solr/ckan # Redis REDIS_VERSION=6 CKAN_REDIS_URL=redis://redis:6379/1 TEST_CKAN_REDIS_URL=redis://redis:6379/1 # Datapusher DATAPUSHER_VERSION=0.0.20 CKAN_DATAPUSHER_URL=http://xloader:5000 CKAN__DATAPUSHER__CALLBACK_URL_BASE=http://xloader:5000 # Xloader XLOADER_VERSION=1.0.1 XLOADER_PORT=5000 XLOADER_PORT_HOST=5000 # NGINX NGINX_PORT=80 NGINX_SSLPORT=443 # Extensions CKAN__PLUGINS="envvars image_view text_view recline_view datastore datapusher" XLOADER__PLUGINS="envvars image_view text_view recline_view datastore xloader" CKAN__HARVEST__MQ__TYPE=redis CKAN__HARVEST__MQ__HOSTNAME=redis CKAN__HARVEST__MQ__PORT=6379 CKAN__HARVEST__MQ__REDIS_DB=1 Dockerfile
FROM ckan/ckan-base-xloader:1.0.1 # Set up environment variables ENV APP_DIR=/srv/app ENV TZ=UTC RUN echo ${TZ} > /etc/timezone # Make sure both files are not exactly the same RUN if ! [ /usr/share/zoneinfo/${TZ} -ef /etc/localtime ]; then \ cp /usr/share/zoneinfo/${TZ} /etc/localtime ;\ fi ; EXPOSE 5000