kamu-data
diff --git a/‎CHANGELOG.md
+3-3 b/‎CHANGELOG.md
+3-3
diff --git a/‎DEVELOPER.md
+1-1 b/‎DEVELOPER.md
+1-1
diff --git a/‎images/demo/jupyter/kamu-start-hook.sh
+1-1 b/‎images/demo/jupyter/kamu-start-hook.sh
+1-1
diff --git a/‎images/demo/user-home/01 - Kamu Basics (COVID-19 example)/01 - Introduction.ipynb
+1-1 b/‎images/demo/user-home/01 - Kamu Basics (COVID-19 example)/01 - Introduction.ipynb
+1-1
diff --git a/‎images/demo/user-home/01 - Kamu Basics (COVID-19 example)/02 - Collaboration.ipynb
+1-1 b/‎images/demo/user-home/01 - Kamu Basics (COVID-19 example)/02 - Collaboration.ipynb
+1-1
diff --git a/‎images/demo/user-home/01 - Kamu Basics (COVID-19 example)/03 - Trust.ipynb
+1-1 b/‎images/demo/user-home/01 - Kamu Basics (COVID-19 example)/03 - Trust.ipynb
+1-1
diff --git a/‎images/demo/user-home/01 - Kamu Basics (COVID-19 example)/init-chapter-3.sh
+1-1 b/‎images/demo/user-home/01 - Kamu Basics (COVID-19 example)/init-chapter-3.sh
+1-1
diff --git a/‎images/demo/user-home/02 - Web3 Data (Ethereum trading example)/init-chapter-3.sh
+2-2 b/‎images/demo/user-home/02 - Web3 Data (Ethereum trading example)/init-chapter-3.sh
+2-2
diff --git a/‎images/demo/user-home/02 - Web3 Data (Ethereum trading example)/init-chapter-all.sh
+2-2 b/‎images/demo/user-home/02 - Web3 Data (Ethereum trading example)/init-chapter-all.sh
+2-2
diff --git a/‎resources/cli-reference.md
+5-5 b/‎resources/cli-reference.md
+5-5
diff --git a/‎resources/schema.gql
+1-1 b/‎resources/schema.gql
+1-1
diff --git a/‎src/adapter/graphql/src/extensions.rs
+1-1 b/‎src/adapter/graphql/src/extensions.rs
+1-1
diff --git a/‎src/adapter/graphql/src/queries/datasets/dataset.rs
+1-1 b/‎src/adapter/graphql/src/queries/datasets/dataset.rs
+1-1
diff --git a/‎src/adapter/http/src/api_error.rs
+3-2 b/‎src/adapter/http/src/api_error.rs
+3-2
diff --git a/‎src/app/cli/src/app.rs
+2-2 b/‎src/app/cli/src/app.rs
+2-2
diff --git a/‎src/app/cli/src/cli_parser.rs
+5-5 b/‎src/app/cli/src/cli_parser.rs
+5-5
diff --git a/‎src/app/cli/src/commands/log_command.rs
+3-3 b/‎src/app/cli/src/commands/log_command.rs
+3-3
diff --git a/‎src/app/cli/src/commands/new_dataset_command.rs
+2-2 b/‎src/app/cli/src/commands/new_dataset_command.rs
+2-2
diff --git a/‎src/app/cli/src/commands/system_diagnose_command.rs
+5-5 b/‎src/app/cli/src/commands/system_diagnose_command.rs
+5-5
@@ -94,7 +94,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Simplified flow statuses within Flow System (no more Queued or Scheduled status)
 - Extended flow start conditions with more debug information for UI needs
 - Simplified flow cancellation API:
-    - Cancelling in Waiting/Running states is accepted, and aborts the flow and it's associated tasks
+    - Cancelling in Waiting/Running states is accepted, and aborts the flow, and it's associated tasks
     - Cancelling in Waiting/Running states also automatically pauses flow configuration
 
 ## [0.162.1] - 2024-02-28
@@ -142,7 +142,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [0.157.0] - 2024-02-12
 ### Added
-- Complete support for `arm64` architecture (including M-series Apple silicon)
+- Complete support for `arm64` architecture (including M-series Apple Silicon)
   - `kamu-cli` now depends on multi-platform Datafusion, Spark, Flink, and Jupyter images allowing you to run data processing at native CPU speeds
 ### Changed
 - Spark engine is upgraded to latest version of Spark 3.5
@@ -152,7 +152,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [0.156.3] - 2024-02-09
 ### Added
-- Native support for `arm64` architecture (including M-series Apple silicon) in `kamu-cli` and `kamu-engine-datafusion`
+- Native support for `arm64` architecture (including M-series Apple Silicon) in `kamu-cli` and `kamu-engine-datafusion`
   - Note: Flink and Spark engine images still don't provide `arm64` architecture and continue to require QEMU
 ### Changed
 - Flow system scheduling rules improved to respect system-wide throttling setting and take last successful run into account when rescheduling a flow or after a restart
 
@@ -79,7 +79,7 @@ When needing to test against a specific official release you can install it unde
 curl -s "https://get.kamu.dev" | KAMU_ALIAS=kamu-release sh
 ```
 
-New to Rust? Check out these [IDE configuration tip](#ide-tips).
+New to Rust? Check out these [IDE configuration tips](#ide-configuration).
 
 
 ### Configure Podman as Default Runtime (Recommended)
 
@@ -4,7 +4,7 @@ set -eo pipefail
 
 # Generate Kamu Node auth token if GitHub access token is provided
 if [ -n "${GITHUB_TOKEN}" ] && [ -n "${GITHUB_LOGIN}" ] && [ -n "${KAMU_JWT_SECRET}" ] && [ -n "${KAMU_NODE_URL}" ]; then
-    kamu_token=$(kamu system generate-token --gh-login ${GITHUB_LOGIN} --gh-access-token ${GITHUB_TOKEN})
+    kamu_token=$(kamu system generate-token --gh-login "${GITHUB_LOGIN}" --gh-access-token "${GITHUB_TOKEN}")
     kamu login --user --access-token "${kamu_token}" "${KAMU_NODE_URL#odf+}"
 fi
 
 
@@ -524,7 +524,7 @@
     "\n",
     "<div class=\"alert alert-block alert-warning\">\n",
     "\n",
-    "Note that if you just type `df` in a cell - you will get an error. That's because by default this kernel executes operations in the remore PySpark environment. To access `df` you need to use `%%local` cell command which will execute code in this local Python kernel.\n",
+    "Note that if you just type `df` in a cell - you will get an error. That's because by default this kernel executes operations in the remote PySpark environment. To access `df` you need to use `%%local` cell command which will execute code in this local Python kernel.\n",
     "    \n",
     "</div>\n",
     "\n",
 
@@ -59,7 +59,7 @@
     "- Decentralized storage like IPFS, Arweave (see next tutorial on \"Web3 Data\")\n",
     "- Or even some old FTP server (see [full list](https://docs.kamu.dev/node/deploy/storage/))\n",
     "\n",
-    "As a reporitory for this demo we will use [**Kamu Node**](https://docs.kamu.dev/node/) - you can think of it as a small server on top of some storage (AWS S3 or Minio in this case) that speaks ORF protocol and provides a bunch of cool additional features, like highly optimized uploads/downloads, dataset search, and even executing remote SQL queries.\n",
+    "As a repository for this demo we will use [**Kamu Node**](https://docs.kamu.dev/node/) - you can think of it as a small server on top of some storage (AWS S3 or Minio in this case) that speaks ORF protocol and provides a bunch of cool additional features, like highly optimized uploads/downloads, dataset search, and even executing remote SQL queries.\n",
     "\n",
     "<div class=\"alert alert-block alert-success\">\n",
     "So let's add the node as a repository:\n",
 
@@ -244,7 +244,7 @@
     "- [Learning materials](https://docs.kamu.dev/cli/get-started/learning-materials/)\n",
     "- and [external datasets](https://github.com/kamu-data/kamu-contrib).\n",
     "\n",
-    "All examples are conviniently located in `~/XX - Other Examples` directory of this Jupyter server.\n",
+    "All examples are conveniently located in `~/XX - Other Examples` directory of this Jupyter server.\n",
     "\n",
     "Also, please tell us what you think about this demo:\n",
     "- by [joining our Discord](https://discord.gg/nU6TXRQNXC)\n",
 
@@ -4,7 +4,7 @@ set -e
 rm -rf .kamu
 kamu init
 
-kamu repo add kamu-node ${KAMU_NODE_URL}
+kamu repo add kamu-node "${KAMU_NODE_URL}"
 kamu pull kamu-node/kamu/covid19.british-columbia.case-details --no-alias
 kamu pull kamu-node/kamu/covid19.ontario.case-details
 kamu add datasets/canada*.yaml
 
@@ -17,7 +17,7 @@ kamu add \
     datasets/account.transactions.yaml
 
 kamu pull account.transactions account.tokens.transfers
-kamu pull --set-watermark `date --iso-8601=s` account.transactions
-kamu pull --set-watermark `date --iso-8601=s` account.tokens.transfers
+kamu pull --set-watermark "$(date --iso-8601=s)" account.transactions
+kamu pull --set-watermark "$(date --iso-8601=s)" account.tokens.transfers
 
 kamu pull --all
@@ -13,7 +13,7 @@ kamu pull "${REPO_BASE_URL}co.alphavantage.tickers.daily.spy"
 kamu add -r datasets/
 
 kamu pull account.transactions account.tokens.transfers
-kamu pull --set-watermark `date --iso-8601=s` account.transactions
-kamu pull --set-watermark `date --iso-8601=s` account.tokens.transfers
+kamu pull --set-watermark "$(date --iso-8601=s)" account.transactions
+kamu pull --set-watermark "$(date --iso-8601=s)" account.tokens.transfers
 
 kamu pull --all
@@ -653,7 +653,7 @@ Push local data into a repository
 
 Use this command to share your new dataset or new data with others. All changes performed by this command are atomic and non-destructive. This command will analyze the state of the dataset at the repository and will only upload data and metadata that wasn't previously seen.
 
-Similarly to git, if someone else modified the dataset concurrently with you - your push will be rejected and you will have to resolve the conflict.
+Similarly to git, if someone else modified the dataset concurrently with you - your push will be rejected, and you will have to resolve the conflict.
 
 **Examples:**
 
@@ -695,7 +695,7 @@ Use this command to rename a dataset in your local workspace. Renaming is safe i
 
 **Examples:**
 
-Renaming is often useful when you pull a remote dataset by URL and it gets auto-assigned not the most convenient name:
+Renaming is often useful when you pull a remote dataset by URL, and it gets auto-assigned not the most convenient name:
 
     kamu pull ipfs://bafy...a0da
     kamu rename bafy...a0da my.dataset
@@ -827,7 +827,7 @@ Manage set of remote aliases associated with datasets
 * `add` — Adds a remote alias to a dataset
 * `delete` — Deletes a remote alias associated with a dataset
 
-When you pull and push datasets from repositories kamu uses aliases to let you avoid specifying the full remote referente each time. Aliases are usually created the first time you do a push or pull and saved for later. If you have an unusual setup (e.g. pushing to multiple repositories) you can use this command to manage the aliases.
+When you pull and push datasets from repositories kamu uses aliases to let you avoid specifying the full remote reference each time. Aliases are usually created the first time you do a push or pull and saved for later. If you have an unusual setup (e.g. pushing to multiple repositories) you can use this command to manage the aliases.
 
 **Examples:**
 
@@ -947,7 +947,7 @@ Executes an SQL query or drops you into an SQL shell
 
 **Options:**
 
-* `--url <URL>` — URL of a running JDBC server (e.g jdbc:hive2://example.com:10000)
+* `--url <URL>` — URL of a running JDBC server (e.g. jdbc:hive2://example.com:10000)
 * `-c`, `--command <CMD>` — SQL command to run
 * `--script <FILE>` — SQL script file to execute
 * `--engine <ENG>` — Engine type to use for this SQL session
@@ -1201,7 +1201,7 @@ There are two types of compactions: soft and hard.
 
 Soft compactions produce new files while leaving the old blocks intact. This allows for faster queries, while still preserving the accurate history of how dataset evolved over time.
 
-Hard compactions rewrite the history of the dataset as if data was originally written in big batches. They allow to shrink the history of a dataset to just a few blocks, reclaim the space used by old data files, but at the expense of history loss. Hard compactions will rewrite the metadata chain, changing block hashes. Therefore they will **break all downstream datasets** that depend on them.
+Hard compactions rewrite the history of the dataset as if data was originally written in big batches. They allow to shrink the history of a dataset to just a few blocks, reclaim the space used by old data files, but at the expense of history loss. Hard compactions will rewrite the metadata chain, changing block hashes. Therefore, they will **break all downstream datasets** that depend on them.
 
 **Examples:**
 
 
@@ -258,7 +258,7 @@ type Dataset {
 	"""
 	alias: DatasetAlias!
 	"""
-	Returns the kind of a dataset (Root or Derivative)
+	Returns the kind of dataset (Root or Derivative)
 	"""
 	kind: DatasetKind!
 	"""
 
@@ -84,7 +84,7 @@ struct ErrorBacktraceFormatter<'a>(&'a dyn std::error::Error);
 
 impl<'a> std::fmt::Display for ErrorBacktraceFormatter<'a> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        // Uses the inner-most captured backtrace
+        // Uses the innermost captured backtrace
         let mut error = Some(self.0);
         let mut backtrace = None;
         while let Some(e) = error {
 
@@ -78,7 +78,7 @@ impl Dataset {
         self.dataset_handle.alias.clone().into()
     }
 
-    /// Returns the kind of a dataset (Root or Derivative)
+    /// Returns the kind of dataset (Root or Derivative)
     async fn kind(&self, ctx: &Context<'_>) -> Result<DatasetKind> {
         let dataset = self.get_dataset(ctx).await?;
         let summary = dataset
 
@@ -34,8 +34,9 @@ use kamu::domain::*;
 ///
 /// A conversion between the domain error and [`ApiError`] has to exist. We on
 /// purpose avoid [From] and [Into] traits and using [`IntoApiError`] instead as
-/// we want this conversion to be explicit - it's too easy to put a questionmark
-/// operator on a fallible operation without thinking what it will actually do.
+/// we want this conversion to be explicit - it's too easy to put a question
+/// mark operator on a fallible operation without thinking what it will actually
+/// do.
 ///
 /// Note that in between handlers different errors have different meaning, e.g.
 /// an absence of a dataset in one handler should lead to `404 Not Found`, while
 
@@ -176,7 +176,7 @@ pub fn prepare_dependencies_graph_repository(
         .add_value(current_account_subject)
         .add::<kamu::domain::auth::AlwaysHappyDatasetActionAuthorizer>()
         .add::<kamu::DependencyGraphServiceInMemory>()
-        // Don't add it's own initializer, leave optional dependency uninitialized
+        // Don't add its own initializer, leave optional dependency uninitialized
         .build();
 
     let dataset_repo = special_catalog_for_graph.get_one().unwrap();
@@ -288,7 +288,7 @@ pub fn configure_base_catalog(
 
     b.add::<accounts::AccountService>();
 
-    // No Github login possible for single-tenant workspace
+    // No GitHub login possible for single-tenant workspace
     if multi_tenant_workspace {
         b.add::<kamu_adapter_oauth::OAuthGithub>();
     }
 
@@ -805,7 +805,7 @@ pub fn cli() -> Command {
                         r#"
                         Use this command to share your new dataset or new data with others. All changes performed by this command are atomic and non-destructive. This command will analyze the state of the dataset at the repository and will only upload data and metadata that wasn't previously seen.
 
-                        Similarly to git, if someone else modified the dataset concurrently with you - your push will be rejected and you will have to resolve the conflict.
+                        Similarly to git, if someone else modified the dataset concurrently with you - your push will be rejected, and you will have to resolve the conflict.
 
                         **Examples:**
 
@@ -850,7 +850,7 @@ pub fn cli() -> Command {
 
                         **Examples:**
 
-                        Renaming is often useful when you pull a remote dataset by URL and it gets auto-assigned not the most convenient name:
+                        Renaming is often useful when you pull a remote dataset by URL, and it gets auto-assigned not the most convenient name:
 
                             kamu pull ipfs://bafy...a0da
                             kamu rename bafy...a0da my.dataset
@@ -1012,7 +1012,7 @@ pub fn cli() -> Command {
                             ])
                             .after_help(indoc::indoc!(
                                 r#"
-                                When you pull and push datasets from repositories kamu uses aliases to let you avoid specifying the full remote referente each time. Aliases are usually created the first time you do a push or pull and saved for later. If you have an unusual setup (e.g. pushing to multiple repositories) you can use this command to manage the aliases.
+                                When you pull and push datasets from repositories kamu uses aliases to let you avoid specifying the full remote reference each time. Aliases are usually created the first time you do a push or pull and saved for later. If you have an unusual setup (e.g. pushing to multiple repositories) you can use this command to manage the aliases.
 
                                 **Examples:**
 
@@ -1108,7 +1108,7 @@ pub fn cli() -> Command {
                             Arg::new("url")
                                 .long("url")
                                 .value_name("URL")
-                                .help("URL of a running JDBC server (e.g jdbc:hive2://example.com:10000)"),
+                                .help("URL of a running JDBC server (e.g. jdbc:hive2://example.com:10000)"),
                             Arg::new("command")
                                 .short('c')
                                 .long("command")
@@ -1291,7 +1291,7 @@ pub fn cli() -> Command {
 
                                 Soft compactions produce new files while leaving the old blocks intact. This allows for faster queries, while still preserving the accurate history of how dataset evolved over time.
 
-                                Hard compactions rewrite the history of the dataset as if data was originally written in big batches. They allow to shrink the history of a dataset to just a few blocks, reclaim the space used by old data files, but at the expense of history loss. Hard compactions will rewrite the metadata chain, changing block hashes. Therefore they will **break all downstream datasets** that depend on them.
+                                Hard compactions rewrite the history of the dataset as if data was originally written in big batches. They allow to shrink the history of a dataset to just a few blocks, reclaim the space used by old data files, but at the expense of history loss. Hard compactions will rewrite the metadata chain, changing block hashes. Therefore, they will **break all downstream datasets** that depend on them.
 
                                 **Examples:**
 
 
@@ -27,7 +27,7 @@ pub struct LogCommand {
     dataset_repo: Arc<dyn DatasetRepository>,
     dataset_action_authorizer: Arc<dyn auth::DatasetActionAuthorizer>,
     dataset_ref: DatasetRef,
-    outout_format: Option<String>,
+    output_format: Option<String>,
     filter: Option<String>,
     limit: usize,
     output_config: Arc<OutputConfig>,
@@ -47,7 +47,7 @@ impl LogCommand {
             dataset_repo,
             dataset_action_authorizer,
             dataset_ref,
-            outout_format: outout_format.map(ToOwned::to_owned),
+            output_format: outout_format.map(ToOwned::to_owned),
             filter: filter.map(ToOwned::to_owned),
             limit,
             output_config,
@@ -88,7 +88,7 @@ impl Command for LogCommand {
             .await?;
 
         let mut renderer: Box<dyn MetadataRenderer> = match (
-            self.outout_format.as_deref(),
+            self.output_format.as_deref(),
             self.output_config.is_tty && self.output_config.verbosity_level == 0,
         ) {
             (None, true) => Box::new(PagedAsciiRenderer::new(id_to_alias_lookup, self.limit)),
 
@@ -105,7 +105,7 @@ impl NewDatasetCommand {
                               - date
                               - city
                           # Lets you manipulate names of the system columns to avoid conflicts
-                          # or use names better suited for yout data.
+                          # or use names better suited for your data.
                           # See: https://docs.kamu.dev/odf/reference/#setvocab
                         - kind: SetVocab
                           eventTimeColumn: date
@@ -149,7 +149,7 @@ impl NewDatasetCommand {
                                 population + 1 as population
                               from `com.example.city-populations`
                         # Lets you manipulate names of the system columns to avoid
-                        # conflicts or use names better suited for yout data.
+                        # conflicts or use names better suited for your data.
                         # See: https://docs.kamu.dev/odf/reference/#setvocab
                         - kind: SetVocab
                           eventTimeColumn: date
 
@@ -43,7 +43,7 @@ pub struct SystemDiagnoseCommand {
     dataset_repo: Arc<dyn DatasetRepository>,
     verification_svc: Arc<dyn VerificationService>,
     container_runtime: Arc<ContainerRuntime>,
-    is_in_workpace: bool,
+    is_in_workspace: bool,
     run_info_dir: PathBuf,
 }
 
@@ -52,14 +52,14 @@ impl SystemDiagnoseCommand {
         dataset_repo: Arc<dyn DatasetRepository>,
         verification_svc: Arc<dyn VerificationService>,
         container_runtime: Arc<ContainerRuntime>,
-        is_in_workpace: bool,
+        is_in_workspace: bool,
         run_info_dir: PathBuf,
     ) -> Self {
         Self {
             dataset_repo,
             verification_svc,
             container_runtime,
-            is_in_workpace,
+            is_in_workspace,
             run_info_dir,
         }
     }
@@ -92,7 +92,7 @@ impl Command for SystemDiagnoseCommand {
             }),
         ];
         // Add checks which required workspace initialization
-        if self.is_in_workpace {
+        if self.is_in_workspace {
             diagnostic_checks.push(Box::new(CheckWorkspaceConsistent {
                 dataset_repo: self.dataset_repo.clone(),
                 verification_svc: self.verification_svc.clone(),
@@ -110,7 +110,7 @@ impl Command for SystemDiagnoseCommand {
             }
         }
 
-        if !self.is_in_workpace {
+        if !self.is_in_workspace {
             writeln!(out, "{}", style("Directory is not kamu workspace").yellow())?;
         }
         Ok(())
Original file line number	Diff line number	Diff line change
`@@ -78,7 +78,7 @@ impl Dataset {`
`78`	`78`	`self.dataset_handle.alias.clone().into()`
`79`	`79`	`}`
`80`	`80`
`81`		`- /// Returns the kind of a dataset (Root or Derivative)`
	`81`	`+ /// Returns the kind of dataset (Root or Derivative)`
`82`	`82`	`async fn kind(&self, ctx: &Context<'_>) -> Result<DatasetKind> {`
`83`	`83`	`let dataset = self.get_dataset(ctx).await?;`
`84`	`84`	`let summary = dataset`