Skip to content

Commit

Permalink
Merge pull request #5073 from cadl/issue-5035
Browse files Browse the repository at this point in the history
feat: Add scalar function humanize
  • Loading branch information
mergify[bot] authored May 3, 2022
2 parents cbbc69a + 12feb6a commit f1b947a
Show file tree
Hide file tree
Showing 15 changed files with 592 additions and 177 deletions.
109 changes: 109 additions & 0 deletions common/functions/src/scalars/others/humanize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Copyright 2022 Datafuse Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt;
use std::marker::PhantomData;

use common_datavalues::prelude::*;
use common_datavalues::with_match_primitive_type_id;
use common_exception::Result;
use common_io::prelude::*;
use num_traits::AsPrimitive;

use crate::scalars::assert_numeric;
use crate::scalars::scalar_unary_op;
use crate::scalars::EvalContext;
use crate::scalars::Function;
use crate::scalars::FunctionDescription;
use crate::scalars::FunctionFeatures;

#[derive(Clone)]
pub struct GenericHumanizeFunction<T> {
display_name: String,
t: PhantomData<T>,
}

pub trait HumanizeConvertFunction: Send + Sync + Clone + 'static {
fn convert(v: impl AsPrimitive<f64>, _ctx: &mut EvalContext) -> Vec<u8>;
}

impl<T> GenericHumanizeFunction<T>
where T: HumanizeConvertFunction
{
pub fn try_create(display_name: &str, args: &[&DataTypeImpl]) -> Result<Box<dyn Function>> {
assert_numeric(args[0])?;
Ok(Box::new(GenericHumanizeFunction::<T> {
display_name: display_name.to_string(),
t: PhantomData,
}))
}

pub fn desc() -> FunctionDescription {
FunctionDescription::creator(Box::new(Self::try_create))
.features(FunctionFeatures::default().deterministic().num_arguments(1))
}
}

impl<T> Function for GenericHumanizeFunction<T>
where T: HumanizeConvertFunction
{
fn name(&self) -> &str {
&*self.display_name
}

fn return_type(&self) -> DataTypeImpl {
Vu8::to_data_type()
}

fn eval(
&self,
_func_ctx: crate::scalars::FunctionContext,
columns: &common_datavalues::ColumnsWithField,
_input_rows: usize,
) -> Result<common_datavalues::ColumnRef> {
with_match_primitive_type_id!(columns[0].data_type().data_type_id(), |$F| {
let col = scalar_unary_op::<$F, Vu8, _>(columns[0].column(), T::convert, &mut EvalContext::default())?;
Ok(col.arc())
},{
unreachable!()
})
}
}

impl<T> fmt::Display for GenericHumanizeFunction<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.display_name)
}
}

#[derive(Clone)]
pub struct HumanizeSizeConvertFunction;

impl HumanizeConvertFunction for HumanizeSizeConvertFunction {
fn convert(v: impl AsPrimitive<f64>, _: &mut EvalContext) -> Vec<u8> {
Vec::from(convert_byte_size(v.as_()))
}
}

#[derive(Clone)]
pub struct HumanizeNumberConvertFunction;

impl HumanizeConvertFunction for HumanizeNumberConvertFunction {
fn convert(v: impl AsPrimitive<f64>, _: &mut EvalContext) -> Vec<u8> {
Vec::from(convert_number_size(v.as_()))
}
}

pub type HumanizeSizeFunction = GenericHumanizeFunction<HumanizeSizeConvertFunction>;
pub type HumanizeNumberFunction = GenericHumanizeFunction<HumanizeNumberConvertFunction>;
3 changes: 3 additions & 0 deletions common/functions/src/scalars/others/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

mod exists;
mod humanize;
mod ignore;
mod inet_aton;
mod inet_ntoa;
Expand All @@ -22,6 +23,8 @@ mod sleep;
mod type_of;

pub use exists::ExistsFunction;
pub use humanize::HumanizeNumberFunction;
pub use humanize::HumanizeSizeFunction;
pub use ignore::IgnoreFunction;
pub use inet_aton::InetAtonFunction;
pub use inet_aton::TryInetAtonFunction;
Expand Down
4 changes: 4 additions & 0 deletions common/functions/src/scalars/others/other.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use super::humanize::HumanizeNumberFunction;
use super::humanize::HumanizeSizeFunction;
use super::inet_aton::InetAtonFunction;
use super::inet_aton::TryInetAtonFunction;
use super::inet_ntoa::InetNtoaFunction;
Expand All @@ -34,6 +36,8 @@ impl OtherFunction {

factory.register("running_difference", RunningDifferenceFunction::desc());
factory.register("ignore", IgnoreFunction::desc());
factory.register("humanize_size", HumanizeSizeFunction::desc());
factory.register("humanize_number", HumanizeNumberFunction::desc());

// INET string to number.
factory.register("ipv4_string_to_num", InetAtonFunction::desc());
Expand Down
107 changes: 107 additions & 0 deletions common/functions/tests/it/scalars/others/humanize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright 2022 Datafuse Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use common_datavalues::prelude::*;
use common_exception::Result;

use crate::scalars::scalar_function_test::test_scalar_functions;
use crate::scalars::scalar_function_test::ScalarFunctionTest;

#[test]
fn test_humanize_size_function() -> Result<()> {
let tests = vec![
ScalarFunctionTest {
name: "humanize_size(1024)",
columns: vec![Series::from_data(vec![1024_u32])],
expect: Series::from_data(vec!["1.00 KiB"]),
error: "",
},
ScalarFunctionTest {
name: "humanize_size(-1024)",
columns: vec![Series::from_data(vec![-1024_i32])],
expect: Series::from_data(vec!["-1.00 KiB"]),
error: "",
},
ScalarFunctionTest {
name: "humanize_size('abc')",
columns: vec![Series::from_data(vec!["abc"])],
expect: Series::from_data(vec!["-1 KiB"]),
error: "Expected a numeric type, but got String",
},
ScalarFunctionTest {
name: "humanize_size(true)",
columns: vec![Series::from_data(vec![true])],
expect: Series::from_data(vec!["-1 KiB"]),
error: "Expected a numeric type, but got Boolean",
},
];

test_scalar_functions("humanize_size", &tests)
}

#[test]
fn test_humanize_size_nullable() -> Result<()> {
let tests = vec![ScalarFunctionTest {
name: "humanize_size(null)",
columns: vec![Series::from_data(vec![Some(1_048_576_i32), None])],
expect: Series::from_data(vec![Some("1.00 MiB"), None]),
error: "",
}];

test_scalar_functions("humanize_size", &tests)
}

#[test]
fn test_humanize_number_function() -> Result<()> {
let tests = vec![
ScalarFunctionTest {
name: "humanize_number(1000)",
columns: vec![Series::from_data(vec![1000_u32])],
expect: Series::from_data(vec!["1 thousand"]),
error: "",
},
ScalarFunctionTest {
name: "humanize_number(-1000)",
columns: vec![Series::from_data(vec![-1000_i32])],
expect: Series::from_data(vec!["-1 thousand"]),
error: "",
},
ScalarFunctionTest {
name: "humanize_number('abc')",
columns: vec![Series::from_data(vec!["abc"])],
expect: Series::from_data(vec!["-1 thousand"]),
error: "Expected a numeric type, but got String",
},
ScalarFunctionTest {
name: "humanize_number(true)",
columns: vec![Series::from_data(vec![true])],
expect: Series::from_data(vec!["-1 thousand"]),
error: "Expected a numeric type, but got Boolean",
},
];

test_scalar_functions("humanize_number", &tests)
}

#[test]
fn test_humanize_number_nullable() -> Result<()> {
let tests = vec![ScalarFunctionTest {
name: "humanize_number(null)",
columns: vec![Series::from_data(vec![Some(1_000_000_i32), None])],
expect: Series::from_data(vec![Some("1 million"), None]),
error: "",
}];

test_scalar_functions("humanize_number", &tests)
}
77 changes: 77 additions & 0 deletions common/functions/tests/it/scalars/others/inet_aton.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright 2022 Datafuse Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use common_datavalues::prelude::*;
use common_exception::Result;

use crate::scalars::scalar_function_test::test_scalar_functions;
use crate::scalars::scalar_function_test::ScalarFunctionTest;

#[test]
fn test_try_inet_aton_function() -> Result<()> {
let tests = vec![
ScalarFunctionTest {
name: "valid input",
columns: vec![Series::from_data(vec!["127.0.0.1"])],
expect: Series::from_data(vec![Option::<u32>::Some(2130706433_u32)]),
error: "",
},
ScalarFunctionTest {
name: "invalid input",
columns: vec![Series::from_data(vec![Some("invalid")])],
expect: Series::from_data(vec![Option::<u32>::None]),
error: "",
},
ScalarFunctionTest {
name: "null input",
columns: vec![Series::from_data(vec![Option::<Vec<u8>>::None])],
expect: Series::from_data(vec![Option::<u32>::None]),
error: "",
},
];

test_scalar_functions("try_inet_aton", &tests)
}

#[test]
fn test_inet_aton_function() -> Result<()> {
let tests = vec![
ScalarFunctionTest {
name: "valid input",
columns: vec![Series::from_data([Some("127.0.0.1")])],
expect: Series::from_data(vec![Option::<u32>::Some(2130706433_u32)]),
error: "",
},
ScalarFunctionTest {
name: "null input",
columns: vec![Series::from_data([Option::<Vec<u8>>::None])],
expect: Series::from_data([Option::<u32>::None]),
error: "",
},
ScalarFunctionTest {
name: "invalid input",
columns: vec![Series::from_data([Some("1.1.1.1"), Some("batman")])],
expect: Series::from_data(vec![Option::<u32>::None]),
error: "Failed to parse 'batman' into a IPV4 address, invalid IP address syntax",
},
ScalarFunctionTest {
name: "empty string",
columns: vec![Series::from_data([Some("1.1.1.1"), Some("")])],
expect: Series::from_data(vec![Option::<u32>::None]),
error: "Failed to parse '' into a IPV4 address, invalid IP address syntax",
},
];

test_scalar_functions("inet_aton", &tests)
}
Loading

1 comment on commit f1b947a

@vercel
Copy link

@vercel vercel bot commented on f1b947a May 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

databend – ./

databend-databend.vercel.app
databend.rs
databend.vercel.app
databend-git-main-databend.vercel.app

Please sign in to comment.