Skip to content

Commit

Permalink
refactor!: replace ResourceId with sqlparser Idents in the `pro…
Browse files Browse the repository at this point in the history
…of-of-sql` crate (#449)

Please be sure to look over the pull request guidelines here:
https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md#submit-pr.

# Please go through the following checklist
- [x] The PR title and commit messages adhere to guidelines here:
https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md.
In particular `!` is used if and only if at least one breaking change
has been introduced.
- [x] I have run the ci check script with `source
scripts/run_ci_checks.sh`.

# Rationale for this change
This PR addresses the need to replace the
`proof_of_sql_parser::ResourceId` with the `sqlparser::ast::ObjectName`
in the `proof-of-sql` crate as part of a larger transition toward
integrating the `sqlparser` .

This change is a subtask of issue #235, with the main goal of
streamlining the repository by switching to the `sqlparser` crate and
gradually replacing intermediary constructs like
`proof_of_sql_parser::intermediate_ast` with `sqlparser::ast`.
<!--
Why are you proposing this change? If this is already explained clearly
in the linked issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.

 Example:
 Add `NestedLoopJoinExec`.
 Closes #345.

Since we added `HashJoinExec` in #323 it has been possible to do
provable inner joins. However performance is not satisfactory in some
cases. Hence we need to fix the problem by implement
`NestedLoopJoinExec` and speed up the code
 for `HashJoinExec`.
-->

# What changes are included in this PR?
- All instances of `proof_of_sql_parser::ResourceId` have been replaced
with `sqlparser::ast::ObjectName`
- A few of them required a ResourceId (e.g. Select etc..), which depends
on the ResourceId and will be migrated at the refactoring of Exprs.
- Every usage of `ResourceId` has been updated to maintain the original
functionality, ensuring no changes to the logic or behavior.
- The breaking change here is that `ObjectName` doesn't support `Copy`
trait so we have needed few clones in the places where values are moved

<!--
There is no need to duplicate the description in the ticket here but it
is sometimes worth providing a summary of the individual changes in this
PR.

Example:
- Add `NestedLoopJoinExec`.
- Speed up `HashJoinExec`.
- Route joins to `NestedLoopJoinExec` if the outer input is sufficiently
small.
-->

# Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
Yes

Part of #235 
Fixes #352
  • Loading branch information
iajoiner authored Feb 7, 2025
2 parents 4901093 + 546b460 commit fbc396e
Show file tree
Hide file tree
Showing 81 changed files with 1,984 additions and 1,740 deletions.
16 changes: 9 additions & 7 deletions crates/proof-of-sql/benches/scaffold/benchmark_accessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ impl<'a, C: Commitment> BenchmarkAccessor<'a, C> {
setup: &C::PublicSetup<'_>,
) {
self.table_schemas.insert(
table_ref,
table_ref.clone(),
columns
.iter()
.map(|(id, col)| (id.clone(), col.column_type()))
Expand All @@ -44,15 +44,17 @@ impl<'a, C: Commitment> BenchmarkAccessor<'a, C> {
let mut length = None;
for (column, commitment) in columns.iter().zip(commitments) {
self.columns.insert(
ColumnRef::new(table_ref, column.0.clone(), column.1.column_type()),
ColumnRef::new(table_ref.clone(), column.0.clone(), column.1.column_type()),
column.1,
);
self.commitments.insert(
ColumnRef::new(table_ref, column.0.clone(), column.1.column_type()),
ColumnRef::new(table_ref.clone(), column.0.clone(), column.1.column_type()),
commitment,
);
self.column_types
.insert((table_ref, column.0.clone()), column.1.column_type());
self.column_types.insert(
(table_ref.clone(), column.0.clone()),
column.1.column_type(),
);

if let Some(len) = length {
assert!(len == column.1.len());
Expand All @@ -76,10 +78,10 @@ impl<C: Commitment> MetadataAccessor for BenchmarkAccessor<'_, C> {
/// # Panics
///
/// Will panic if the table reference does not exist in the lengths map.
fn get_length(&self, table_ref: TableRef) -> usize {
fn get_length(&self, table_ref: &TableRef) -> usize {
*self.lengths.get(&table_ref).unwrap()
}
fn get_offset(&self, _table_ref: TableRef) -> usize {
fn get_offset(&self, _table_ref: &TableRef) -> usize {
0
}
}
Expand Down
1 change: 0 additions & 1 deletion crates/proof-of-sql/benches/scaffold/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use benchmark_accessor::BenchmarkAccessor;
pub mod querys;
mod random_util;
use random_util::{generate_random_columns, OptionalRandBound};

/// # Panics
///
/// Will panic if:
Expand Down
4 changes: 2 additions & 2 deletions crates/proof-of-sql/examples/albums/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{
arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
TestAccessor,
TableRef, TestAccessor,
},
proof_primitive::dory::{
DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup,
Expand Down Expand Up @@ -87,7 +87,7 @@ fn main() {
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"albums.collection".parse().unwrap(),
TableRef::new("albums", "collection"),
OwnedTable::try_from(albums_batch).unwrap(),
0,
);
Expand Down
4 changes: 2 additions & 2 deletions crates/proof-of-sql/examples/avocado-prices/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{OwnedTable, OwnedTableTestAccessor},
base::database::{OwnedTable, OwnedTableTestAccessor, TableRef},
proof_primitive::dory::{
DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup,
},
Expand Down Expand Up @@ -88,7 +88,7 @@ fn main() {

// Load the table into an "Accessor" so that the prover and verifier can access the data/commitments.
let accessor = OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_from_table(
"avocado.prices".parse().unwrap(),
TableRef::new("avocado", "prices"),
OwnedTable::try_from(data_batch).unwrap(),
0,
&prover_setup,
Expand Down
5 changes: 2 additions & 3 deletions crates/proof-of-sql/examples/books/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
//!
//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
//! you can run `cargo run --release --example books --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation.
use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{
arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
TestAccessor,
TableRef, TestAccessor,
},
proof_primitive::dory::{
DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup,
Expand Down Expand Up @@ -88,7 +87,7 @@ fn main() {
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"books.books".parse().unwrap(),
TableRef::new("books", "books"),
OwnedTable::try_from(books_batch).unwrap(),
0,
);
Expand Down
5 changes: 2 additions & 3 deletions crates/proof-of-sql/examples/brands/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
//!
//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
//! you can run `cargo run --release --example brands --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation.
use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{
arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
TestAccessor,
TableRef, TestAccessor,
},
proof_primitive::dory::{
DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup,
Expand Down Expand Up @@ -88,7 +87,7 @@ fn main() {
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"brands.global_brands".parse().unwrap(),
TableRef::new("brands", "global_brands"),
OwnedTable::try_from(brands_batch).unwrap(),
0,
);
Expand Down
5 changes: 2 additions & 3 deletions crates/proof-of-sql/examples/census/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{OwnedTable, OwnedTableTestAccessor},
base::database::{OwnedTable, OwnedTableTestAccessor, TableRef},
proof_primitive::dory::{
DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup,
},
Expand All @@ -19,7 +19,6 @@ use proof_of_sql::{
};
use rand::{rngs::StdRng, SeedableRng};
use std::{fs::File, time::Instant};

// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS.
// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`.
// For a sampling:
Expand Down Expand Up @@ -91,7 +90,7 @@ fn main() {

// Load the table into an "Accessor" so that the prover and verifier can access the data/commitments.
let accessor = OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_from_table(
"census.income".parse().unwrap(),
TableRef::new("census", "income"),
OwnedTable::try_from(census_income_batch).unwrap(),
0,
&prover_setup,
Expand Down
5 changes: 2 additions & 3 deletions crates/proof-of-sql/examples/countries/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
//!
//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
//! you can run `cargo run --release --example countries --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation.
use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{
arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
TestAccessor,
TableRef, TestAccessor,
},
proof_primitive::dory::{
DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup,
Expand Down Expand Up @@ -89,7 +88,7 @@ fn main() {
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"countries.countries".parse().unwrap(),
TableRef::new("countries", "countries"),
OwnedTable::try_from(countries_batch).unwrap(),
0,
);
Expand Down
5 changes: 2 additions & 3 deletions crates/proof-of-sql/examples/dinosaurs/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
//!
//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
//! you can run `cargo run --release --example dinosaurs --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation.
use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{
arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
TestAccessor,
TableRef, TestAccessor,
},
proof_primitive::dory::{
DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup,
Expand Down Expand Up @@ -89,7 +88,7 @@ fn main() {
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"dinosaurs.dinosaurs".parse().unwrap(),
TableRef::new("dinosaurs", "dinosaurs"),
OwnedTable::try_from(dinosaurs_batch).unwrap(),
0,
);
Expand Down
5 changes: 2 additions & 3 deletions crates/proof-of-sql/examples/dog_breeds/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
//!
//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
//! you can run `cargo run --release --example dog_breeds --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation.
use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{OwnedTable, OwnedTableTestAccessor, TestAccessor},
base::database::{OwnedTable, OwnedTableTestAccessor, TableRef, TestAccessor},
proof_primitive::dory::{
DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup,
},
Expand Down Expand Up @@ -84,7 +83,7 @@ fn main() {
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"dog_breeds.breeds".parse().unwrap(),
TableRef::new("dog_breeds", "breeds"),
OwnedTable::try_from(dog_breeds_batch).unwrap(),
0,
);
Expand Down
5 changes: 2 additions & 3 deletions crates/proof-of-sql/examples/hello_world/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use ark_std::test_rng;
use proof_of_sql::{
base::database::{
owned_table_utility::{bigint, owned_table, varchar},
OwnedTableTestAccessor, TestAccessor,
OwnedTableTestAccessor, TableRef, TestAccessor,
},
proof_primitive::dory::{
DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup,
Expand All @@ -14,7 +14,6 @@ use std::{
io::{stdout, Write},
time::Instant,
};

/// # Panics
///
/// Will panic if flushing the output fails, which can happen due to issues with the underlying output stream.
Expand Down Expand Up @@ -53,7 +52,7 @@ fn main() {
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"sxt.table".parse().unwrap(),
TableRef::new("sxt", "table"),
owned_table([
bigint("a", [1, 2, 3, 2]),
varchar("b", ["hi", "hello", "there", "world"]),
Expand Down
5 changes: 2 additions & 3 deletions crates/proof-of-sql/examples/plastics/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
//!
//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
//! you can run `cargo run --release --example plastics --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation.
use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{
arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
TestAccessor,
TableRef, TestAccessor,
},
proof_primitive::dory::{
DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup,
Expand Down Expand Up @@ -87,7 +86,7 @@ fn main() {
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"plastics.types".parse().unwrap(),
TableRef::new("plastics", "types"),
OwnedTable::try_from(plastics_batch).unwrap(),
0,
);
Expand Down
8 changes: 4 additions & 4 deletions crates/proof-of-sql/examples/posql_db/commit_accessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ impl<C: Commitment + Serialize + for<'a> Deserialize<'a>> CommitAccessor<C> {
fs::write(path, postcard::to_allocvec(commit)?)?;
Ok(())
}
pub fn load_commit(&mut self, table_ref: TableRef) -> Result<(), Box<dyn Error>> {
pub fn load_commit(&mut self, table_ref: &TableRef) -> Result<(), Box<dyn Error>> {
let path = self.base_path.join(format!("{table_ref}.commit"));
let commit = postcard::from_bytes(&fs::read(path)?)?;
self.inner.insert(table_ref, commit);
self.inner.insert(table_ref.clone(), commit);
Ok(())
}
pub fn get_commit(&self, table_ref: &TableRef) -> Option<&TableCommitment<C>> {
Expand All @@ -43,11 +43,11 @@ impl<C: Commitment> CommitmentAccessor<C> for CommitAccessor<C> {
}
}
impl<C: Commitment> MetadataAccessor for CommitAccessor<C> {
fn get_length(&self, table_ref: proof_of_sql::base::database::TableRef) -> usize {
fn get_length(&self, table_ref: &proof_of_sql::base::database::TableRef) -> usize {
self.inner.get_length(table_ref)
}

fn get_offset(&self, table_ref: proof_of_sql::base::database::TableRef) -> usize {
fn get_offset(&self, table_ref: &proof_of_sql::base::database::TableRef) -> usize {
self.inner.get_offset(table_ref)
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/proof-of-sql/examples/posql_db/csv_accessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ impl<S: Scalar> DataAccessor<S> for CsvDataAccessor {
}
}
impl MetadataAccessor for CsvDataAccessor {
fn get_length(&self, table_ref: TableRef) -> usize {
fn get_length(&self, table_ref: &TableRef) -> usize {
self.inner.get_length(table_ref)
}
fn get_offset(&self, table_ref: TableRef) -> usize {
fn get_offset(&self, table_ref: &TableRef) -> usize {
self.inner.get_offset(table_ref)
}
}
Expand Down
18 changes: 9 additions & 9 deletions crates/proof-of-sql/examples/posql_db/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,20 +199,20 @@ fn main() {
CommitAccessor::<DynamicDoryCommitment>::new(PathBuf::from(args.path.clone()));
let csv_accessor = CsvDataAccessor::new(PathBuf::from(args.path));
commit_accessor
.load_commit(table_name)
.load_commit(&table_name)
.expect("Failed to load commit");
let mut table_commitment = commit_accessor.get_commit(&table_name).unwrap().clone();
let schema = Schema::new(
commit_accessor
.lookup_schema(table_name)
.lookup_schema(table_name.clone())
.iter()
.map(|(i, t)| Field::new(i.value.as_str(), t.into(), false))
.collect::<Vec<_>>(),
);
let append_batch =
read_record_batch_from_csv(schema, &file_path).expect("Failed to read csv file.");
csv_accessor
.append_batch(&table_name, &append_batch)
.append_batch(&table_name.clone(), &append_batch)
.expect("Failed to write batch");
let timer = start_timer("Updating Commitment");
table_commitment
Expand All @@ -228,19 +228,19 @@ fn main() {
CommitAccessor::<DynamicDoryCommitment>::new(PathBuf::from(args.path.clone()));
let mut csv_accessor = CsvDataAccessor::new(PathBuf::from(args.path.clone()));
let tables = query.get_table_references("example".parse().unwrap());
for table in tables.into_iter().map(TableRef::new) {
for table in tables.into_iter().map(Into::into) {
commit_accessor
.load_commit(table)
.load_commit(&table)
.expect("Failed to load commit");
let schema = Schema::new(
commit_accessor
.lookup_schema(table)
.lookup_schema(table.clone())
.iter()
.map(|(i, t)| Field::new(i.value.as_str(), t.into(), false))
.collect::<Vec<_>>(),
);
csv_accessor
.load_table(table, schema)
.load_table(table.clone(), schema)
.expect("Failed to load table");
}
let query = QueryExpr::try_new(query, "example".into(), &commit_accessor).unwrap();
Expand All @@ -262,9 +262,9 @@ fn main() {
CommitAccessor::<DynamicDoryCommitment>::new(PathBuf::from(args.path.clone()));
let table_refs = query.get_table_references("example".parse().unwrap());
for table_ref in table_refs {
let table_name = TableRef::new(table_ref);
let table_name: TableRef = table_ref.into();
commit_accessor
.load_commit(table_name)
.load_commit(&table_name)
.expect("Failed to load commit");
}
let query = QueryExpr::try_new(query, "example".into(), &commit_accessor).unwrap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ impl<S: Scalar> DataAccessor<S> for RecordBatchAccessor {
}
}
impl MetadataAccessor for RecordBatchAccessor {
fn get_length(&self, table_ref: TableRef) -> usize {
fn get_length(&self, table_ref: &TableRef) -> usize {
self.tables
.get(&table_ref)
.expect("Table not found.")
.num_rows()
}

fn get_offset(&self, table_ref: TableRef) -> usize {
fn get_offset(&self, table_ref: &TableRef) -> usize {
assert!(self.tables.contains_key(&table_ref), "Table not found.");
0
}
Expand Down
Loading

0 comments on commit fbc396e

Please sign in to comment.