Skip to content

Commit

Permalink
chore: Move table func trait (#2208)
Browse files Browse the repository at this point in the history
Moves it close to where it's implemented. The 'builtins' file is already
a bit big.
  • Loading branch information
scsmithr authored Dec 5, 2023
1 parent 6cfa754 commit dd05d83
Show file tree
Hide file tree
Showing 21 changed files with 166 additions and 148 deletions.
1 change: 1 addition & 0 deletions crates/metastore/src/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,7 @@ impl BuiltinCatalog {

oid += 1;
}

for func in BUILTIN_FUNCS.iter_funcs() {
// Put them all in the default schema.
let schema_id = schema_names
Expand Down
117 changes: 3 additions & 114 deletions crates/sqlbuiltins/src/builtins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,11 @@
//! database node will be able to see it, but will not be able to execute
//! appropriately. We can revisit this if this isn't acceptable long-term.

use async_trait::async_trait;
use datafusion::{
arrow::datatypes::{DataType, Field as ArrowField, Schema as ArrowSchema},
datasource::TableProvider,
logical_expr::Signature,
};
use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider};
use datafusion::arrow::datatypes::{DataType, Field as ArrowField, Schema as ArrowSchema};
use once_cell::sync::Lazy;
use pgrepr::oid::FIRST_GLAREDB_BUILTIN_ID;
use protogen::metastore::types::{
catalog::{EntryMeta, EntryType, FunctionEntry, FunctionType, RuntimePreference},
options::InternalColumnDefinition,
};
use std::{collections::HashMap, sync::Arc};
use protogen::metastore::types::options::InternalColumnDefinition;
use std::sync::Arc;

/// The default catalog that exists in all GlareDB databases.
pub const DEFAULT_CATALOG: &str = "default";
Expand Down Expand Up @@ -612,108 +603,6 @@ impl BuiltinView {
}
}

#[async_trait]
/// A builtin table function.
/// Table functions are ones that are used in the FROM clause.
/// e.g. `SELECT * FROM my_table_func(...)`
pub trait TableFunc: Sync + Send + BuiltinFunction {
fn runtime_preference(&self) -> RuntimePreference;
fn detect_runtime(
&self,
_args: &[FuncParamValue],
_parent: RuntimePreference,
) -> datafusion_ext::errors::Result<RuntimePreference> {
Ok(self.runtime_preference())
}

/// Return a table provider using the provided args.
async fn create_provider(
&self,
ctx: &dyn TableFuncContextProvider,
args: Vec<FuncParamValue>,
opts: HashMap<String, FuncParamValue>,
) -> datafusion_ext::errors::Result<Arc<dyn TableProvider>>;
}

/// The same as `BuiltinFunction` , but with const values.
pub trait ConstBuiltinFunction: Sync + Send {
const NAME: &'static str;
const DESCRIPTION: &'static str;
const EXAMPLE: &'static str;
const FUNCTION_TYPE: FunctionType;
fn signature(&self) -> Option<Signature> {
None
}
}

impl<T> BuiltinFunction for T
where
T: ConstBuiltinFunction,
{
fn name(&self) -> &'static str {
Self::NAME
}
fn sql_example(&self) -> Option<String> {
Some(Self::EXAMPLE.to_string())
}
fn description(&self) -> Option<String> {
Some(Self::DESCRIPTION.to_string())
}
fn function_type(&self) -> FunctionType {
Self::FUNCTION_TYPE
}
fn signature(&self) -> Option<Signature> {
self.signature()
}
}
/// A builtin function.
/// This trait is implemented by all builtin functions.
pub trait BuiltinFunction: Sync + Send {
/// The name for this function. This name will be used when looking up
/// function implementations.
fn name(&self) -> &'static str;
/// Return the signature for this function.
/// Defaults to None.
// TODO: Remove the default impl once we have `signature` implemented for all functions
fn signature(&self) -> Option<Signature> {
None
}
/// Return a sql example for this function.
/// Defaults to None.
fn sql_example(&self) -> Option<String> {
None
}
/// Return a description for this function.
/// Defaults to None.
fn description(&self) -> Option<String> {
None
}
// Returns the function type. 'aggregate', 'scalar', or 'table'
fn function_type(&self) -> FunctionType;

// convert to a builtin `FunctionEntry`
fn as_function_entry(&self, id: u32, parent: u32) -> FunctionEntry {
let meta = EntryMeta {
entry_type: EntryType::Function,
id,
parent,
name: self.name().to_string(),
builtin: true,
external: false,
is_temp: false,
sql_example: self.sql_example(),
description: self.description(),
};

FunctionEntry {
meta,
func_type: self.function_type(),
runtime_preference: RuntimePreference::Unspecified,
signature: self.signature(),
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion crates/sqlbuiltins/src/functions/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// `Abs` would otherwise be `Abs` instead of `abs`. and so on.
#![allow(non_camel_case_types)]

use crate::{builtins::BuiltinFunction, document};
use crate::{document, functions::BuiltinFunction};
use datafusion::logical_expr::AggregateFunction;
use protogen::metastore::types::catalog::FunctionType;

Expand Down
98 changes: 93 additions & 5 deletions crates/sqlbuiltins/src/functions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,108 @@ mod aggregates;
mod scalars;
mod table;

use std::collections::HashMap;

use crate::builtins::BuiltinFunction;

use self::scalars::ArrowCastFunction;
use self::table::BuiltinTableFuncs;
use datafusion::logical_expr::{AggregateFunction, BuiltinScalarFunction};
use datafusion::logical_expr::{AggregateFunction, BuiltinScalarFunction, Signature};
use once_cell::sync::Lazy;
use protogen::metastore::types::catalog::{
EntryMeta, EntryType, FunctionEntry, FunctionType, RuntimePreference,
};
use std::collections::HashMap;
use std::sync::Arc;

/// Builtin table returning functions available for all sessions.
pub static BUILTIN_TABLE_FUNCS: Lazy<BuiltinTableFuncs> = Lazy::new(BuiltinTableFuncs::new);
pub static ARROW_CAST_FUNC: Lazy<ArrowCastFunction> = Lazy::new(|| ArrowCastFunction {});
pub static BUILTIN_FUNCS: Lazy<BuiltinFuncs> = Lazy::new(BuiltinFuncs::new);

/// A builtin function.
/// This trait is implemented by all builtin functions.
pub trait BuiltinFunction: Sync + Send {
/// The name for this function. This name will be used when looking up
/// function implementations.
fn name(&self) -> &str;

/// Return the signature for this function.
/// Defaults to None.
// TODO: Remove the default impl once we have `signature` implemented for all functions
fn signature(&self) -> Option<Signature> {
None
}

/// Return a sql example for this function.
/// Defaults to None.
fn sql_example(&self) -> Option<String> {
None
}

/// Return a description for this function.
/// Defaults to None.
fn description(&self) -> Option<String> {
None
}

// Returns the function type. 'aggregate', 'scalar', or 'table'
fn function_type(&self) -> FunctionType;

/// Convert to a builtin `FunctionEntry`
///
/// The default implementation is suitable for aggregates and scalars. Table
/// functions need to set runtime preference manually.
fn as_function_entry(&self, id: u32, parent: u32) -> FunctionEntry {
let meta = EntryMeta {
entry_type: EntryType::Function,
id,
parent,
name: self.name().to_string(),
builtin: true,
external: false,
is_temp: false,
sql_example: self.sql_example(),
description: self.description(),
};

FunctionEntry {
meta,
func_type: self.function_type(),
runtime_preference: RuntimePreference::Unspecified,
signature: self.signature(),
}
}
}

/// The same as `BuiltinFunction` , but with const values.
pub trait ConstBuiltinFunction: Sync + Send {
const NAME: &'static str;
const DESCRIPTION: &'static str;
const EXAMPLE: &'static str;
const FUNCTION_TYPE: FunctionType;
fn signature(&self) -> Option<Signature> {
None
}
}

impl<T> BuiltinFunction for T
where
T: ConstBuiltinFunction,
{
fn name(&self) -> &str {
Self::NAME
}
fn sql_example(&self) -> Option<String> {
Some(Self::EXAMPLE.to_string())
}
fn description(&self) -> Option<String> {
Some(Self::DESCRIPTION.to_string())
}
fn function_type(&self) -> FunctionType {
Self::FUNCTION_TYPE
}
fn signature(&self) -> Option<Signature> {
self.signature()
}
}

pub struct BuiltinFuncs {
funcs: HashMap<String, Arc<dyn BuiltinFunction>>,
}
Expand Down
2 changes: 1 addition & 1 deletion crates/sqlbuiltins/src/functions/scalars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#![allow(non_camel_case_types)]

use crate::{
builtins::{BuiltinFunction, ConstBuiltinFunction},
document,
functions::{BuiltinFunction, ConstBuiltinFunction},
};
use datafusion::logical_expr::BuiltinScalarFunction;
use protogen::metastore::types::catalog::FunctionType;
Expand Down
3 changes: 2 additions & 1 deletion crates/sqlbuiltins/src/functions/table/bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider};
use datasources::bigquery::{BigQueryAccessor, BigQueryTableAccess};
use protogen::metastore::types::catalog::{FunctionType, RuntimePreference};

use crate::builtins::{ConstBuiltinFunction, TableFunc};
use super::TableFunc;
use crate::functions::ConstBuiltinFunction;

#[derive(Debug, Clone, Copy)]
pub struct ReadBigQuery;
Expand Down
4 changes: 3 additions & 1 deletion crates/sqlbuiltins/src/functions/table/delta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ use std::collections::HashMap;
use std::sync::Arc;

use super::table_location_and_opts;
use crate::builtins::{ConstBuiltinFunction, TableFunc};
use async_trait::async_trait;
use datafusion::datasource::TableProvider;
use datafusion_ext::errors::{ExtensionError, Result};
use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider};
use datasources::lake::delta::access::load_table_direct;
use protogen::metastore::types::catalog::{FunctionType, RuntimePreference};

use super::TableFunc;
use crate::functions::ConstBuiltinFunction;

/// Function for scanning delta tables.
///
/// Note that this is separate from the other object store functions since
Expand Down
10 changes: 4 additions & 6 deletions crates/sqlbuiltins/src/functions/table/excel.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
use std::collections::HashMap;
use std::sync::Arc;

use async_trait::async_trait;
use datafusion::datasource::TableProvider;
use datafusion_ext::errors::{ExtensionError, Result};
Expand All @@ -9,10 +6,11 @@ use datasources::common::url::DatasourceUrl;
use datasources::excel::read_excel_impl;
use ioutil::resolve_path;
use protogen::metastore::types::catalog::{FunctionType, RuntimePreference};
use std::collections::HashMap;
use std::sync::Arc;

use crate::builtins::{ConstBuiltinFunction, TableFunc};

use super::table_location_and_opts;
use super::{table_location_and_opts, TableFunc};
use crate::functions::ConstBuiltinFunction;

#[derive(Debug, Clone, Copy)]
pub struct ExcelScan;
Expand Down
3 changes: 2 additions & 1 deletion crates/sqlbuiltins/src/functions/table/generate_series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ use futures::Stream;
use num_traits::Zero;
use protogen::metastore::types::catalog::{FunctionType, RuntimePreference};

use crate::builtins::{ConstBuiltinFunction, TableFunc};
use super::TableFunc;
use crate::functions::ConstBuiltinFunction;

#[derive(Debug, Clone, Copy)]
pub struct GenerateSeries;
Expand Down
3 changes: 2 additions & 1 deletion crates/sqlbuiltins/src/functions/table/iceberg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use std::collections::HashMap;
use std::sync::Arc;

use super::table_location_and_opts;
use crate::builtins::TableFunc;
use async_trait::async_trait;
pub(crate) use data_files::*;
use datafusion::arrow::array::{Int32Builder, Int64Builder, StringBuilder, UInt64Builder};
Expand All @@ -21,6 +20,8 @@ use protogen::metastore::types::catalog::{FunctionType, RuntimePreference};
pub(crate) use scan::*;
pub(crate) use snapshots::*;

use super::TableFunc;

fn box_err<E>(err: E) -> ExtensionError
where
E: std::error::Error + Send + Sync + 'static,
Expand Down
4 changes: 2 additions & 2 deletions crates/sqlbuiltins/src/functions/table/iceberg/data_files.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::builtins::ConstBuiltinFunction;

use super::*;

use crate::functions::ConstBuiltinFunction;

/// Scan data file metadata for the current snapshot of an iceberg table. Will
/// not attempt to read data files.
#[derive(Debug, Clone, Copy)]
Expand Down
4 changes: 2 additions & 2 deletions crates/sqlbuiltins/src/functions/table/iceberg/scan.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::builtins::ConstBuiltinFunction;

use super::*;

use crate::functions::ConstBuiltinFunction;

/// Scan an iceberg table.
#[derive(Debug, Clone, Copy)]
pub struct IcebergScan;
Expand Down
4 changes: 2 additions & 2 deletions crates/sqlbuiltins/src/functions/table/iceberg/snapshots.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::builtins::ConstBuiltinFunction;

use super::*;

use crate::functions::ConstBuiltinFunction;

/// Scan snapshot information for an iceberg tables. Will not attempt to read
/// data files.
#[derive(Debug, Clone, Copy)]
Expand Down
4 changes: 3 additions & 1 deletion crates/sqlbuiltins/src/functions/table/lance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ use std::collections::HashMap;
use std::sync::Arc;

use super::table_location_and_opts;
use crate::builtins::{ConstBuiltinFunction, TableFunc};
use async_trait::async_trait;
use datafusion::datasource::TableProvider;
use datafusion_ext::errors::{ExtensionError, Result};
use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider};
use datasources::lance::scan_lance_table;
use protogen::metastore::types::catalog::{FunctionType, RuntimePreference};

use super::TableFunc;
use crate::functions::ConstBuiltinFunction;

/// Function for scanning delta tables.
///
/// Note that this is separate from the other object store functions since
Expand Down
Loading

0 comments on commit dd05d83

Please sign in to comment.