Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add EXTRACT Function To OpenSearch SQL Plugin #1421

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,15 @@ public static FunctionExpression day_of_week(
return compile(functionProperties, BuiltinFunctionName.DAY_OF_WEEK, expressions);
}

public static FunctionExpression extract(FunctionProperties functionProperties,
Expression... expressions) {
return compile(functionProperties, BuiltinFunctionName.EXTRACT, expressions);
}

public static FunctionExpression extract(Expression... expressions) {
return extract(FunctionProperties.None, expressions);
}

public static FunctionExpression from_days(Expression... expressions) {
return compile(FunctionProperties.None, BuiltinFunctionName.FROM_DAYS, expressions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import static org.opensearch.sql.utils.DateTimeUtils.extractDate;
import static org.opensearch.sql.utils.DateTimeUtils.extractDateTime;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Table;
import java.math.BigDecimal;
Expand All @@ -56,6 +57,7 @@
import java.time.temporal.TemporalAmount;
import java.util.Arrays;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
Expand Down Expand Up @@ -105,6 +107,32 @@ public class DateTimeFunction {
// Mode used for week/week_of_year function by default when no argument is provided
private static final ExprIntegerValue DEFAULT_WEEK_OF_YEAR_MODE = new ExprIntegerValue(0);


// Map used to determine format output for the extract function
private static final Map<String, String> extract_formats =
ImmutableMap.<String, String>builder()
.put("MICROSECOND", "SSSSSS")
.put("SECOND", "ss")
.put("MINUTE", "mm")
.put("HOUR", "HH")
.put("DAY", "dd")
.put("WEEK", "w")
.put("MONTH", "MM")
.put("YEAR", "yyyy")
.put("SECOND_MICROSECOND", "ssSSSSSS")
.put("MINUTE_MICROSECOND", "mmssSSSSSS")
.put("MINUTE_SECOND", "mmss")
.put("HOUR_MICROSECOND", "HHmmssSSSSSS")
.put("HOUR_SECOND", "HHmmss")
.put("HOUR_MINUTE", "HHmm")
.put("DAY_MICROSECOND", "ddHHmmssSSSSSS")
.put("DAY_SECOND", "ddHHmmss")
.put("DAY_MINUTE", "ddHHmm")
.put("DAY_HOUR", "ddHH")
.put("YEAR_MONTH", "yyyyMM")
.put("QUARTER", "Q")
.build();

// Map used to determine format output for the get_format function
private static final Table<String, String, String> formats =
ImmutableTable.<String, String, String>builder()
Expand Down Expand Up @@ -157,6 +185,7 @@ public void register(BuiltinFunctionRepository repository) {
repository.register(dayOfWeek(BuiltinFunctionName.DAY_OF_WEEK.getName()));
repository.register(dayOfYear(BuiltinFunctionName.DAYOFYEAR));
repository.register(dayOfYear(BuiltinFunctionName.DAY_OF_YEAR));
repository.register(extract());
repository.register(from_days());
repository.register(from_unixtime());
repository.register(get_format());
Expand Down Expand Up @@ -538,6 +567,17 @@ private DefaultFunctionResolver dayOfYear(BuiltinFunctionName dayOfYear) {
);
}

private DefaultFunctionResolver extract() {
return define(BuiltinFunctionName.EXTRACT.getName(),
implWithProperties(nullMissingHandlingWithProperties(DateTimeFunction::exprExtractForTime),
LONG, STRING, TIME),
impl(nullMissingHandling(DateTimeFunction::exprExtract), LONG, STRING, DATE),
impl(nullMissingHandling(DateTimeFunction::exprExtract), LONG, STRING, DATETIME),
impl(nullMissingHandling(DateTimeFunction::exprExtract), LONG, STRING, TIMESTAMP),
impl(nullMissingHandling(DateTimeFunction::exprExtract), LONG, STRING, STRING)
);
}

/**
* FROM_DAYS(LONG). return the date value given the day number N.
*/
Expand Down Expand Up @@ -1260,6 +1300,48 @@ private ExprValue exprDayOfYear(ExprValue date) {
return new ExprIntegerValue(date.dateValue().getDayOfYear());
}

/**
* Obtains a formatted long value for a specified part and datetime for the 'extract' function.
*
* @param part is an ExprValue which comes from a defined list of accepted values.
* @param datetime the date to be formatted as an ExprValue.
* @return is a LONG formatted according to the input arguments.
*/
public ExprLongValue formatExtractFunction(ExprValue part, ExprValue datetime) {
String partName = part.stringValue().toUpperCase();
LocalDateTime arg = datetime.datetimeValue();
String text = arg.format(DateTimeFormatter.ofPattern(
extract_formats.get(partName), Locale.ENGLISH));

return new ExprLongValue(Long.parseLong(text));
}

/**
* Implements extract function. Returns a LONG formatted according to the 'part' argument.
*
* @param part Literal that determines the format of the outputted LONG.
* @param datetime The date/datetime to be formatted.
* @return A LONG
*/
private ExprValue exprExtract(ExprValue part, ExprValue datetime) {
return formatExtractFunction(part, datetime);
}

/**
* Implements extract function. Returns a LONG formatted according to the 'part' argument.
*
* @param part Literal that determines the format of the outputted LONG.
* @param time The time to be formatted.
* @return A LONG
*/
private ExprValue exprExtractForTime(FunctionProperties functionProperties,
ExprValue part,
ExprValue time) {
return formatExtractFunction(
part,
new ExprDatetimeValue(extractDateTime(time, functionProperties)));
}

/**
* From_days implementation for ExprValue.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ public enum BuiltinFunctionName {
DAYOFYEAR(FunctionName.of("dayofyear")),
DAY_OF_WEEK(FunctionName.of("day_of_week")),
DAY_OF_YEAR(FunctionName.of("day_of_year")),
EXTRACT(FunctionName.of("extract")),
FROM_DAYS(FunctionName.of("from_days")),
FROM_UNIXTIME(FunctionName.of("from_unixtime")),
GET_FORMAT(FunctionName.of("get_format")),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.expression.datetime;

import static java.time.temporal.ChronoField.ALIGNED_WEEK_OF_YEAR;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.opensearch.sql.data.type.ExprCoreType.LONG;

import java.time.LocalDate;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.opensearch.sql.data.model.ExprDateValue;
import org.opensearch.sql.data.model.ExprDatetimeValue;
import org.opensearch.sql.data.model.ExprTimeValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.ExpressionTestBase;
import org.opensearch.sql.expression.FunctionExpression;

class ExtractTest extends ExpressionTestBase {

private final String datetimeInput = "2023-02-11 10:11:12.123";

private final String timeInput = "10:11:12.123";

private final String dateInput = "2023-02-11";

private static Stream<Arguments> getDatetimeResultsForExtractFunction() {
return Stream.of(
Arguments.of("DAY_MICROSECOND", 11101112123000L),
Arguments.of("DAY_SECOND", 11101112),
Arguments.of("DAY_MINUTE", 111011),
Arguments.of("DAY_HOUR", 1110)
);
}

private static Stream<Arguments> getTimeResultsForExtractFunction() {
return Stream.of(
Arguments.of("MICROSECOND", 123000),
Arguments.of("SECOND", 12),
Arguments.of("MINUTE", 11),
Arguments.of("HOUR", 10),
Arguments.of("SECOND_MICROSECOND", 12123000),
Arguments.of("MINUTE_MICROSECOND", 1112123000),
Arguments.of("MINUTE_SECOND", 1112),
Arguments.of("HOUR_MICROSECOND", 101112123000L),
Arguments.of("HOUR_SECOND", 101112),
Arguments.of("HOUR_MINUTE", 1011)
);
}

private static Stream<Arguments> getDateResultsForExtractFunction() {
return Stream.of(
Arguments.of("DAY", 11),
Arguments.of("WEEK", 6),
Arguments.of("MONTH", 2),
Arguments.of("QUARTER", 1),
Arguments.of("YEAR", 2023),
Arguments.of("YEAR_MONTH", 202302)
);
}

@ParameterizedTest(name = "{0}")
@MethodSource({
"getDatetimeResultsForExtractFunction",
"getTimeResultsForExtractFunction",
"getDateResultsForExtractFunction"})
public void testExtractWithDatetime(String part, long expected) {
FunctionExpression datetimeExpression = DSL.extract(
DSL.literal(part),
DSL.literal(new ExprDatetimeValue(datetimeInput)));

assertEquals(LONG, datetimeExpression.type());
assertEquals(expected, eval(datetimeExpression).longValue());
assertEquals(
String.format("extract(\"%s\", DATETIME '2023-02-11 10:11:12.123')", part),
datetimeExpression.toString());
}

private void datePartWithTimeArgQuery(String part, String time, long expected) {
ExprTimeValue timeValue = new ExprTimeValue(time);
FunctionExpression datetimeExpression = DSL.extract(
functionProperties,
DSL.literal(part),
DSL.literal(timeValue));

assertEquals(LONG, datetimeExpression.type());
assertEquals(expected,
eval(datetimeExpression).longValue());
}


@Test
public void testExtractDatePartWithTimeType() {
datePartWithTimeArgQuery(
"DAY",
timeInput,
LocalDate.now(functionProperties.getQueryStartClock()).getDayOfMonth());

datePartWithTimeArgQuery(
"WEEK",
timeInput,
LocalDate.now(functionProperties.getQueryStartClock()).get(ALIGNED_WEEK_OF_YEAR));

datePartWithTimeArgQuery(
"MONTH",
timeInput,
LocalDate.now(functionProperties.getQueryStartClock()).getMonthValue());

datePartWithTimeArgQuery(
"YEAR",
timeInput,
LocalDate.now(functionProperties.getQueryStartClock()).getYear());
}

@ParameterizedTest(name = "{0}")
@MethodSource("getDateResultsForExtractFunction")
public void testExtractWithDate(String part, long expected) {
FunctionExpression datetimeExpression = DSL.extract(
DSL.literal(part),
DSL.literal(new ExprDateValue(dateInput)));

assertEquals(LONG, datetimeExpression.type());
assertEquals(expected, eval(datetimeExpression).longValue());
assertEquals(
String.format("extract(\"%s\", DATE '2023-02-11')", part),
datetimeExpression.toString());
}

@ParameterizedTest(name = "{0}")
@MethodSource("getTimeResultsForExtractFunction")
public void testExtractWithTime(String part, long expected) {
FunctionExpression datetimeExpression = DSL.extract(
functionProperties,
DSL.literal(part),
DSL.literal(new ExprTimeValue(timeInput)));

assertEquals(LONG, datetimeExpression.type());
assertEquals(expected, eval(datetimeExpression).longValue());
assertEquals(
String.format("extract(\"%s\", TIME '10:11:12.123')", part),
datetimeExpression.toString());
}

private ExprValue eval(Expression expression) {
return expression.valueOf();
}
}
69 changes: 69 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1752,6 +1752,75 @@ Example::
+-------------------------------------------------+


EXTRACT
_______

Description
>>>>>>>>>>>

Usage: extract(part FROM date) returns a LONG with digits in order according to the given 'part' arguments.
The specific format of the returned long is determined by the table below.

Argument type: PART
PART must be one of the following tokens in the table below.

The format specifiers found in this table are the same as those found in the `DATE_FORMAT`_ function.
.. list-table:: The following table describes the mapping of a 'part' to a particular format.
:widths: 20 80
:header-rows: 1

* - Part
- Format
* - MICROSECOND
- %f
* - SECOND
- %s
* - MINUTE
- %i
* - HOUR
- %H
* - DAY
- %d
* - WEEK
- %X
* - MONTH
- %m
* - YEAR
- %V
* - SECOND_MICROSECOND
- %s%f
* - MINUTE_MICROSECOND
- %i%s%f
* - MINUTE_SECOND
- %i%s
* - HOUR_MICROSECOND
- %H%i%s%f
* - HOUR_SECOND
- %H%i%s
* - HOUR_MINUTE
- %H%i
* - DAY_MICROSECOND
- %d%H%i%s%f
* - DAY_SECOND
- %d%H%i%s
* - DAY_MINUTE
- %d%H%i
* - DAY_HOUR
- %d%H%
* - YEAR_MONTH
- %V%m

Return type: LONG

Example::

os> SELECT extract(YEAR_MONTH FROM "2023-02-07 10:11:12");
fetched rows / total rows = 1/1
+--------------------------------------------------+
| extract(YEAR_MONTH FROM "2023-02-07 10:11:12") |
|--------------------------------------------------|
| 202302 |
+--------------------------------------------------+

FROM_DAYS
---------
Expand Down
Loading