CARRIER-project · svenvanderburg · May 25, 2020 · May 25, 2020 · May 26, 2020 · May 26, 2020
diff --git a/README.MD b/README.MD
@@ -3,19 +3,22 @@
 ## Running the local setup
 Simply run:
 ```shell script
-docker-compose up -d
+docker-compose up --force-recreate
+
+# Populate 'escience' node rdf store with sample data
+./nodes/escience/populate-rdf-store.sh
 ```
 
 # Executing a task
-When your docker setup is running you can execute a task using `run_task.py`. Replace the value in `IMAGE` with your 
+When your docker setup is running you can execute a task using `run_task.py`. Replace the value in `IMAGE` with your
 desired docker image
 
 # Docker registry
 For development purposes you might want to run your own local docker registry:
 ```
 # Run docker registry separately. Node tasks are run in a network separate from the node itself. By making the registry
 # available from the host network it will be accessable by all docker containers
-docker run -d  --name registry --network host registry:2 
+docker run -d  --name registry --network host registry:2
 ```
 
 The `run_task.py` script runs a custom algorithm [available on github](https://github.com/CARRIER-project/vantage6-algorithms)
@@ -24,3 +27,8 @@ Make sure this task is published to the local registry by running `push_to_local
 ```
 python run_task.py
 ```
+
+To run the sample_sparqle_query method:
+```
+python run_task.py --rpc --method 'sample_sparqle_query'
+```
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,4 +1,4 @@
-version: "3.4"
+version: "3.5"
 services:
   db:
     image: postgres
@@ -23,24 +23,35 @@ services:
     ports:
       - "5001:5000"
 
+  rdfstore:
+    image: "jvsoest/graphdb-free:fairstation"
+    ports:
+      - "7200:7200"
+    networks:
+      - vantage6-escience-user-net
+
   escience:
+    # We are not specifying a network, but the node container will automatically
+    # connect to vantage6-escience-user-net, thereby the node and algorithm
+    # container have acces to the rdf store on this network.
     container_name: vantage6-escience-user
     image: "harbor.vantage6.ai/infrastructure/node:latest"
     command: "vnode-local start -c /mnt/config/escience.yml --dockerized"
     volumes:
       - ./nodes/escience/escience-node-config.yml:/mnt/config/escience.yml
       - /var/run/docker.sock:/var/run/docker.sock
-      - ./nodes/escience/boston-sample-data.ttl:/mnt/database/database.ttl
       - escience-shared-volume:/mnt/data
     environment:
       - DATA_VOLUME_NAME=escience-shared-volume
-      - DATABASE_URI=/mnt/database/database.ttl
-
+      - DATABASE_URI=http://rdfstore:7200/repositories/repo1
 
 volumes:
   escience-shared-volume:
     name: escience-shared-volume
 
+networks:
+  vantage6-escience-user-net:
+    name: vantage6-escience-user-net
 #
 #  registry:
 #    image: registry:2

diff --git a/nodes/escience/populate-rdf-store.sh b/nodes/escience/populate-rdf-store.sh
@@ -0,0 +1,9 @@
+echo "Add repository to local graphdb instance"
+curl -X POST\
+    http://localhost:7200/rest/repositories\
+    -H 'Content-Type: multipart/form-data'\
+    -F "config=@nodes/escience/repo-config.ttl"
+
+echo "Load sample transaction data into repo1"
+curl -X POST -H "Content-Type:application/x-turtle" -T "nodes/escience/sample-transaction-data.ttl" \
+  http://localhost:7200/repositories/repo1/statements
diff --git a/nodes/escience/repo-config.ttl b/nodes/escience/repo-config.ttl
@@ -0,0 +1,56 @@
+#
+# Sesame configuration template for a GraphDB Free repository
+#
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
+@prefix rep: <http://www.openrdf.org/config/repository#>.
+@prefix sr: <http://www.openrdf.org/config/repository/sail#>.
+@prefix sail: <http://www.openrdf.org/config/sail#>.
+@prefix owlim: <http://www.ontotext.com/trree/owlim#>.
+
+[] a rep:Repository ;
+    rep:repositoryID "repo1" ;
+    rdfs:label "my repository number one" ;
+    rep:repositoryImpl [
+        rep:repositoryType "graphdb:FreeSailRepository" ;
+        sr:sailImpl [
+            sail:sailType "graphdb:FreeSail" ;
+
+            owlim:owlim-license "" ;
+
+            owlim:base-URL "http://example.org/graphdb#" ;
+            owlim:defaultNS "" ;
+            owlim:entity-index-size "200000" ;
+            owlim:entity-id-size  "32" ;
+            owlim:imports "" ;
+            owlim:repository-type "file-repository" ;
+            owlim:ruleset "owl-horst-optimized" ;
+            owlim:storage-folder "storage" ;
+
+            owlim:enable-context-index "false" ;
+            owlim:cache-memory "80m" ;
+            owlim:tuple-index-memory "80m" ;
+
+            owlim:enablePredicateList "false" ;
+            owlim:predicate-memory "0%" ;
+
+            owlim:fts-memory "0%" ;
+            owlim:ftsIndexPolicy "never" ;
+            owlim:ftsLiteralsOnly "true" ;
+
+            owlim:in-memory-literal-properties "false" ;
+            owlim:enable-literal-index "true" ;
+            owlim:index-compression-ratio "-1" ;
+
+            owlim:check-for-inconsistencies "false" ;
+            owlim:disable-sameAs  "false" ;
+            owlim:enable-optimization  "true" ;
+            owlim:transaction-mode "safe" ;
+            owlim:transaction-isolation "true" ;
+            owlim:query-timeout  "0" ;
+            owlim:query-limit-results  "0" ;
+            owlim:throw-QueryEvaluationException-on-timeout "false" ;
+            owlim:useShutdownHooks  "true" ;
+            owlim:read-only "false" ;
+            owlim:nonInterpretablePredicates "http://www.w3.org/2000/01/rdf-schema#label;http://www.w3.org/1999/02/22-rdf-syntax-ns#type;http://www.ontotext.com/owlim/ces#gazetteerConfig;http://www.ontotext.com/owlim/ces#metadataConfig" ;
+        ]
+    ].
diff --git a/nodes/escience/boston-sample-data.ttl → nodes/escience/sample-transaction-data.ttl b/nodes/escience/boston-sample-data.ttl → nodes/escience/sample-transaction-data.ttl
diff --git a/run_task.py b/run_task.py
@@ -3,46 +3,86 @@
 import time
 from typing import List
 
+import click
 import pandas as pd
 import vantage6.client as vtgclient
 
-USERNAME = 'admin'
-PASSWORD = 'admin'
-
-POST = 'POST'
-WAIT_TIME = 1
-RETRIES = 20
-
-HOST = 'http://localhost'
-PORT = 5001
-
-IMAGE = 'localhost:5000/v6-carrier-py'
-METHOD = 'get_printable_graph'
-COLLABORATION_ID = 1
-ORGANIZATION_IDS = [1]  # [2, 3, 6]
-MASTER = False
-NUM_NODES = 1
-
-
-def main():
-    client = vtgclient.Client(HOST, PORT)
-    client.authenticate(USERNAME, PASSWORD)
+DEFAULT_USERNAME = 'admin'
+DEFAULT_PASSWORD = 'admin'
+
+DEFAULT_WAIT_TIME = 1
+DEFAULT_NUM_TRIES = 20
+
+DEFAULT_HOST = 'http://localhost'
+DEFAULT_PORT = 5001
+
+DEFAULT_IMAGE = 'localhost:5000/v6-carrier-py'
+DEFAULT_METHOD = 'correlation_matrix'
+DEFAULT_COLLABORATION_ID = '1'
+DEFAULT_ORGANIZATION_IDS = '1'
+
+
+@click.command(context_settings={'ignore_unknown_options': True,
+                                 'allow_extra_args': True},
+               help='Run a task on vantage6 nodes. Optionally pass extra'
+                    'keyword arguments to be passed to the algorithm'
+                    '(i.e. "--key value")')
+@click.option('--method', default=DEFAULT_METHOD,
+              help='Method to run')
+@click.option('--image', default=DEFAULT_IMAGE,
+              help='Docker image to run')
+@click.option('--collaboration_id', default=DEFAULT_COLLABORATION_ID,
+              help='Identifier for collaboration to run task with', type=int)
+@click.option('--organization_ids', default=DEFAULT_ORGANIZATION_IDS,
+              help='Identifier for organization ids that should run task, '
+                   'pass as comma-separated string ("1,2")')
+@click.option('--username', default=DEFAULT_USERNAME)
+@click.option('--password', default=DEFAULT_PASSWORD)
+@click.option('--host', default=DEFAULT_HOST,
+              help='Host for vantage6 server')
+@click.option('--port', default=DEFAULT_PORT, type=int,
+              help='Port of vantage6 server')
+@click.option('--wait_time', default=DEFAULT_WAIT_TIME, type=int,
+              help='Time in seconds to wait in between polling tries')
+@click.option('--num_tries', default=DEFAULT_NUM_TRIES, type=int,
+              help='Number of tries for polling task results')
+# Will be true if called with --master, false if called with --rpc
+@click.option('--master/--rpc', default=True,
+              help='--master will run a master algorithm'
+                   '--rpc will run a RPC algorithm')
+@click.pass_context
+def main(context, method: str, image: str, collaboration_id: int,
+         organization_ids: str, master: bool, username: str, password: str,
+         host: str, port: int, wait_time: int, num_tries: int):
+    # context.args collects unkown arguments in a list:
+    # (['--unknown_var', 'value3', '--unknown_var2', 'value4'])
+    kwargs = {context.args[i][2:]: context.args[i + 1]
+              for i in range(0, len(context.args), 2)}
+
+    organization_ids = [int(organization_id) for organization_id in organization_ids.split(',')]
+
+    client = vtgclient.Client(host, port)
+    client.authenticate(username, password)
     client.setup_encryption(None)
 
-    task = client.post_task(name=METHOD, image=IMAGE, collaboration_id=COLLABORATION_ID,
-                            organization_ids=ORGANIZATION_IDS,
-                            input_={'method': METHOD, 'master': MASTER, 'kwargs': {'exclude_orgs': ORGANIZATION_IDS}})
+    task = client.post_task(name=method,
+                            image=image,
+                            collaboration_id=collaboration_id,
+                            organization_ids=organization_ids,
+                            input_={'method': method,
+                                    'master': master,
+                                    'kwargs': kwargs})
 
     print(task)
     results = []
 
-    for i in range(RETRIES):
-        print(f'Number of tries {i}')
-        time.sleep(WAIT_TIME)
+    for i in range(num_tries):
+        print(f'Number of tries: {i}')
+        time.sleep(wait_time)
         try:
             results = client.get_results(task_id=task['id'])
             print(results)
-            if ((len(results) > 0) or MASTER) and all(map(lambda x: x['finished_at'], results)):
+            if ((len(results) > 0) or master) and all(map(lambda x: x['finished_at'], results)):
                 print('\nReceived result:')
                 print_result(results)
                 break