Merge pull request #82 from ontodev/more-examples

Add more penguins examples
ontodev · Jul 25, 2024 · c5d9d09 · c5d9d09
2 parents 4d06536 + 2b31494
commit c5d9d09
Show file tree

Hide file tree

Showing 32 changed files with 2,684 additions and 264 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,8 @@
 /temp/
 /.venv/
 
+/examples/penguins/table/nanobot.toml
+
 # Generated by nanobot
 .nanobot.db*
 
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -48,7 +48,7 @@ rev = "f46fbd5450505644ed9970cef1ae14164699981f"
 [dependencies.ontodev_valve]
 # path = "../ontodev_demo/valve.rs"
 git = "https://github.com/ontodev/valve.rs"
-rev = "b1a0b282bbccf8817901bf45494f25316afb9ac2"
+rev = "1858972dec231f874c87f3fd930cbeef47a444d3"
 
 [dependencies.ontodev_sqlrest]
 git = "https://github.com/ontodev/sqlrest.rs"

diff --git a/Makefile b/Makefile
@@ -3,6 +3,7 @@ usage:
 	@echo "make [TASK]"
 	@echo "  format     reformat code"
 	@echo "  build      build release"
+	@echo "  clean      remove build files"
 	@echo "  test       run all tests"
 	@echo "  dev-check  watch for changes and run cargo check"
 	@echo "  dev-test   watch for changes and run tests"
@@ -26,28 +27,46 @@ format:
 build:
 	cargo build --release
 
+clean:
+	rm -rf build/
+
 build/ build/penguins/:
 	mkdir -p $@
 
-target/debug/nanobot: src/
+build/penguins/%/:
+	mkdir -p $@
+
+target/debug/nanobot: Cargo.* src/**
 	cargo build
 
-target/release/nanobot: src/
+target/release/nanobot: Cargo.* src/**
 	cargo build --release
 
 TEST_TABLES = ldtab prefix statement
-TEST_TSVS = $(foreach T,${TEST_TABLES},src/resources/test_data/${T}.tsv)
-src/resources/test_data/zfa_excerpt.db: ${TEST_TSVS}
+TEST_TSVS = $(foreach T,$(TEST_TABLES),src/resources/test_data/$(T).tsv)
+src/resources/test_data/zfa_excerpt.db: $(TEST_TSVS)
 	rm -f $@
 	sqlite3 $@ ".mode tabs" \
-	$(foreach T,${TEST_TABLES},".import src/resources/test_data/${T}.tsv ${T}")
+	$(foreach T,$(TEST_TABLES),".import src/resources/test_data/$(T).tsv $(T)")
 
-.PHONY: test
-test: target/debug/nanobot build/penguins/.nanobot.db
+EXAMPLES := table tables
+EXAMPLE_DBS := $(foreach EXAMPLE,$(EXAMPLES),build/penguins/$(EXAMPLE)/.nanobot.db)
+
+.PHONY: test-examples
+test-examples: $(EXAMPLE_DBS)
+
+.PHONY: test-code
+test-code:
 	cargo fmt --check
 	cargo test
+
+.PHONY: test-docs
+test-docs:
 	PATH="$${PATH}:$$(pwd)/target/debug"; tesh --debug false ./doc
 
+.PHONY: test
+test: test-code test-examples test-docs
+
 .PHONY: dev-check
 dev-check:
 	find src/ tests/ | entr -rs 'cargo check --release'
@@ -60,18 +79,18 @@ dev-test:
 dev-serve:
 	find src/ | entr -rs 'cargo build --release && target/release/nanobot serve'
 
-build/penguins/.nanobot.db: target/debug/nanobot examples/penguins/ | build/penguins/
+build/penguins/%/.nanobot.db: target/debug/nanobot examples/penguins/% | build/penguins/%/
 	rm -rf $|
 	mkdir -p $|
-	cp -r examples/penguins/* $|
-	mkdir -p $|/src/data/
+	cp -r $(word 2,$^) build/penguins/
 	cd $| \
-	&& python3 generate.py \
-	&& ../../$< init
+	&& rm -f .nanobot.db \
+	&& python3 ../../../examples/penguins/generate.py src/data/penguin.tsv \
+	&& ../../../$< init
 
 .PHONY: penguins
-penguins: target/debug/nanobot build/penguins/.nanobot.db
-	cd build/penguins && ../../$< serve
+penguins: target/debug/nanobot build/penguins/tables/.nanobot.db
+	cd $(dir $(word 2,$^)) && ../../../$< serve
 
 build/synthea.zip: | build
 	curl -L -o build/synthea.zip "https://synthetichealth.github.io/synthea-sample-data/downloads/synthea_sample_data_csv_apr2020.zip"

diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,79 @@
+# Nanobot Examples
+
+Get a `nanobot` binary
+and then run any of these examples from its directory.
+
+### Binary
+
+1. get a `nanobot` binary, either by
+  - downloading a [release](https://github.com/ontodev/nanobot.rs/releases)
+  - using `cargo build` to build `target/debug/nanobot`
+2. make sure that the `nanobot` binary is on your
+   [`PATH`](https://opensource.com/article/17/6/set-path-linux)
+
+Then inside the directory for the specific example,
+you have two options for running Nanobot:
+temporary and persistent.
+
+### Temporary
+
+1. run `nanobot serve --connection :memory:`.
+2. open <http://0.0.0.0:3000> in your web browser
+3. press `^C` (Control-C) to stop the web server
+
+This will create an "in-memory" SQLite database,
+load and validate all the tables,
+then start the Nanobot server on your local machine,
+so you can work with it in your web browser.
+When you stop the Nanobot server (using Control-C),
+the in-memory SQLite database will be deleted,
+along with all your unsaved changes.
+When you run `nanobot serve --connection :memory:` again,
+Nanobot will start over with a new in-memory SQLite database.
+
+If you want to keep a SQLite database file
+to reuse, view, or modify,
+then use the "persistent" approach to running Nanobot.
+
+### Persistent
+
+1. run `nanobot init` to load and validate the tables,
+   creating the `nanobot.toml` configuration file
+   (if it does not exist)
+   and the `.nanobot.db` SQLite database file
+2. run `nanobot serve` to start the web server,
+3. open <http://0.0.0.0:3000> in your web browser
+4. press `^C` (Control-C) to stop the web server
+5. delete the `.nanobot.db` file when you are done with it
+
+The persistent approach will create a SQLite database file
+that you can work with
+while the Nanobot server is running,
+or after it has stopped.
+If you stop the Nanobot server
+and then start it again with `nanobot serve`,
+Nanobot will reuse this SQLite database file --
+it will not create a new database or reload the TSV files.
+To start fresh,
+delete the `.nanobot.db` file
+and run `nanobot init` again to recreate it.
+
+You can view and modify the `.nanobot.db` SQLite database file
+using the `sqlite3` command-line tool,
+other command-line tools like [Visidata](https://www.visidata.org),
+or GUI applications like [DB Browser for SQLite](https://sqlitebrowser.org).
+
+## Troubleshooting
+
+If you're running into errors,
+see if the debugging messages help.
+You will want a `nanobot.toml` configuration file.
+If it does not exist in the directory,
+running `nanobot init` will create it.
+You can configure more verbose logging
+by adding this to the `nanobot.toml` file:
+
+```toml
+[logging]
+level = "DEBUG"
+```
diff --git a/examples/penguins/README.md b/examples/penguins/README.md
@@ -0,0 +1,49 @@
+# Nanobot Examples: Penguins
+
+This directory contains a series of Nanobot examples,
+based on the
+[Palmer Penguins](https://allisonhorst.github.io/palmerpenguins/)
+data collected and made available by
+[Dr. Kristen Gorman](https://www.uaf.edu/cfos/people/faculty/detail/kristen-gorman.php)
+and the
+[Palmer Station, Antarctica LTER](https://pallter.marine.rutgers.edu/),
+a member of the
+[Long Term Ecological Research Network](https://lternet.edu/).
+
+See the [README](../README.md) in the parent directory for more information
+about how to install and run Nanobot for these examples.
+
+## Examples
+
+The simplest "[table](table/)" example
+demonstrates most of Nanobot's features.
+The following examples show additional functionality
+and increasingly powerful workflows.
+
+1. [table](table/)
+2. tables
+
+## Example Data
+
+The `generate.py` script generates "synthetic" (i.e. random) data
+with columns and ranges of values similar to Palmer Penguins,
+as many rows as we want,
+and a specified rate of randomly generated errors.
+(Note that the probability distribution of the random values
+is not the same as the real Palmer Penguins data.)
+This lets us generate as many rows as we like,
+with whatever error rate we choose,
+and test Nanobot on small or large tables of realistic data.
+
+Each example directory includes a `src/data/penguin.tsv` table
+with 1000 rows and a 10% error rate.
+
+You can test variations of the `penguin.tsv` table
+by using `generate.py` to generate more random rows
+with a specified error rate.
+Run `python3 generate.py --help` for more information.
+For example, to test the "[table](table/)" example
+using a million rows with a 1% error rate,
+run `python3 generate.py table/src/data/penguin.tsv 1000000 1`.
+You can restore original table
+by running `git checkout table/src/data/penguins.tsv`.