From a3c7dc43dfc87ebf74cee6af506465c4989055eb Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Wed, 9 Aug 2023 16:02:12 +0530 Subject: [PATCH 1/9] MB-58033: Support for custom datetime layouts --- analysis/datetime/flexible/flexible.go | 71 +++++++++++++++++++++++++- search/query/date_range.go | 10 ++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/analysis/datetime/flexible/flexible.go b/analysis/datetime/flexible/flexible.go index cb5f234d5..a86e4c22b 100644 --- a/analysis/datetime/flexible/flexible.go +++ b/analysis/datetime/flexible/flexible.go @@ -24,6 +24,39 @@ import ( const Name = "flexiblego" +var formatDelimiter byte = '%' + +var formatSpecifierToLayout = map[byte]string{ + formatDelimiter: "%", + 'd': "2", + 'D': "02", + 'm': "1", + 'M': "01", + 'y': "06", + 'Y': "2006", + 'b': "Jan", + 'B': "January", + 'a': "Mon", + 'A': "Monday", + 'h': "3", + 'H': "03", + 'O': "15", + 'i': "4", + 'I': "04", + 's': "5", + 'S': "05", + 'p': "pm", + 'P': "PM", + 'z': "-0700", + 'Z': "-070000", + 'x': "-07", + 'v': "-07:00", + 'V': "-07:00:00", + 'N': ".000000000", + 'F': ".000000", + 'U': ".000", +} + type DateTimeParser struct { layouts []string } @@ -44,6 +77,33 @@ func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) return time.Time{}, "", analysis.ErrInvalidDateTime } +func parseFormatString(formatString string) (string, error) { + dateTimeLayout := "" + usingNewFormat := false + for idx := 0; idx < len(formatString); { + if formatString[idx] == formatDelimiter { + if idx+1 < len(formatString) { + if layout, ok := formatSpecifierToLayout[formatString[idx+1]]; ok { + dateTimeLayout += layout + idx += 2 + usingNewFormat = true + } else { + return "", fmt.Errorf("invalid format string, unknown format specifier: " + string(formatString[idx+1])) + } + } else { + return "", fmt.Errorf("invalid format string, expected character after " + string(formatDelimiter)) + } + } else { + dateTimeLayout += string(formatString[idx]) + idx++ + } + } + if !usingNewFormat { + return "", nil + } + return dateTimeLayout, nil +} + func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { layouts, ok := config["layouts"].([]interface{}) if !ok { @@ -53,7 +113,16 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca for _, layout := range layouts { layoutStr, ok := layout.(string) if ok { - layoutStrs = append(layoutStrs, layoutStr) + layout, err := parseFormatString(layoutStr) + if err != nil { + return nil, err + } + if layout == "" { + // if layout is empty, and there is no error then it means that the layoutStr + // is not using the new format and is in the old format + layout = layoutStr + } + layoutStrs = append(layoutStrs, layout) } } return New(layoutStrs), nil diff --git a/search/query/date_range.go b/search/query/date_range.go index 47012fb18..219ff0c58 100644 --- a/search/query/date_range.go +++ b/search/query/date_range.go @@ -152,6 +152,11 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { min := math.Inf(-1) max := math.Inf(1) if !q.Start.IsZero() { + if q.Start.Year() == 0 { + // year is zero, so this time.Time has unspecified date + // but is Not Zero so must have time only + q.Start.Time = q.Start.Time.AddDate(1700, 0, 0) + } if !isDatetimeCompatible(q.Start) { // overflow return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start) @@ -160,6 +165,11 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { min = numeric.Int64ToFloat64(startInt64) } if !q.End.IsZero() { + if q.End.Year() == 0 { + // year is zero, so this time.Time has unspecified date + // but is Not Zero so must have time only + q.End.Time = q.End.Time.AddDate(1700, 0, 0) + } if !isDatetimeCompatible(q.End) { // overflow return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End) From a22249d9a63d656caebd3fe44131554d5968967b Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Mon, 4 Sep 2023 04:05:58 +0530 Subject: [PATCH 2/9] refactor code --- analysis/datetime/flexible/flexible.go | 71 +---------- analysis/datetime/percent/percent.go | 160 +++++++++++++++++++++++++ search/query/date_range.go | 10 -- 3 files changed, 161 insertions(+), 80 deletions(-) create mode 100644 analysis/datetime/percent/percent.go diff --git a/analysis/datetime/flexible/flexible.go b/analysis/datetime/flexible/flexible.go index a86e4c22b..cb5f234d5 100644 --- a/analysis/datetime/flexible/flexible.go +++ b/analysis/datetime/flexible/flexible.go @@ -24,39 +24,6 @@ import ( const Name = "flexiblego" -var formatDelimiter byte = '%' - -var formatSpecifierToLayout = map[byte]string{ - formatDelimiter: "%", - 'd': "2", - 'D': "02", - 'm': "1", - 'M': "01", - 'y': "06", - 'Y': "2006", - 'b': "Jan", - 'B': "January", - 'a': "Mon", - 'A': "Monday", - 'h': "3", - 'H': "03", - 'O': "15", - 'i': "4", - 'I': "04", - 's': "5", - 'S': "05", - 'p': "pm", - 'P': "PM", - 'z': "-0700", - 'Z': "-070000", - 'x': "-07", - 'v': "-07:00", - 'V': "-07:00:00", - 'N': ".000000000", - 'F': ".000000", - 'U': ".000", -} - type DateTimeParser struct { layouts []string } @@ -77,33 +44,6 @@ func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) return time.Time{}, "", analysis.ErrInvalidDateTime } -func parseFormatString(formatString string) (string, error) { - dateTimeLayout := "" - usingNewFormat := false - for idx := 0; idx < len(formatString); { - if formatString[idx] == formatDelimiter { - if idx+1 < len(formatString) { - if layout, ok := formatSpecifierToLayout[formatString[idx+1]]; ok { - dateTimeLayout += layout - idx += 2 - usingNewFormat = true - } else { - return "", fmt.Errorf("invalid format string, unknown format specifier: " + string(formatString[idx+1])) - } - } else { - return "", fmt.Errorf("invalid format string, expected character after " + string(formatDelimiter)) - } - } else { - dateTimeLayout += string(formatString[idx]) - idx++ - } - } - if !usingNewFormat { - return "", nil - } - return dateTimeLayout, nil -} - func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { layouts, ok := config["layouts"].([]interface{}) if !ok { @@ -113,16 +53,7 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca for _, layout := range layouts { layoutStr, ok := layout.(string) if ok { - layout, err := parseFormatString(layoutStr) - if err != nil { - return nil, err - } - if layout == "" { - // if layout is empty, and there is no error then it means that the layoutStr - // is not using the new format and is in the old format - layout = layoutStr - } - layoutStrs = append(layoutStrs, layout) + layoutStrs = append(layoutStrs, layoutStr) } } return New(layoutStrs), nil diff --git a/analysis/datetime/percent/percent.go b/analysis/datetime/percent/percent.go new file mode 100644 index 000000000..872dcc656 --- /dev/null +++ b/analysis/datetime/percent/percent.go @@ -0,0 +1,160 @@ +// Copyright (c) 2014 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package percent + +import ( + "fmt" + "strings" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "percentgo" + +var formatDelimiter byte = '%' + +var timezoneSpecifier byte = 'Z' + +var formatSpecifierToLayout = map[byte]string{ + formatDelimiter: string(formatDelimiter), + 'd': "2", + 'D': "02", + 'm': "1", + 'M': "01", + 'y': "06", + 'Y': "2006", + 'b': "Jan", + 'B': "January", + 'a': "Mon", + 'A': "Monday", + 'h': "3", + 'H': "03", + 'O': "15", + 'i': "4", + 'I': "04", + 's': "5", + 'S': "05", + 'p': "pm", + 'P': "PM", + 'N': ".999999999", +} + +var timezoneOptions = map[string]string{ + "Z:M": "Z07:00", + "Z:S": "Z07:00:00", + "ZH": "Z07", + "ZM": "Z0700", + "ZS": "Z070000", +} + +type DateTimeParser struct { + layouts []string +} + +func New(layouts []string) *DateTimeParser { + return &DateTimeParser{ + layouts: layouts, + } +} + +func checkTZOptions(formatString string, idx int) (string, error) { + key := "Z" + if idx+1 >= len(formatString) { + return "", fmt.Errorf("invalid format string, expected character after " + string(timezoneSpecifier)) + } + if formatString[idx+1] == ':' { + // check if there is a character after the colon + if idx+2 >= len(formatString) { + return "", fmt.Errorf("invalid format string, expected character after colon") + } + key += ":" + idx++ + } + key += string(formatString[idx+1]) + if layout, ok := timezoneOptions[key]; ok { + return layout, nil + } + return "", fmt.Errorf("invalid format string, unknown timezone specifier: " + key) +} + +func parseFormatString(formatString string) (string, error) { + var dateTimeLayout strings.Builder + // iterate over the format string and replace the format specifiers with + // the corresponding golang constants + for idx := 0; idx < len(formatString); { + // check if the character is a format specifier + if formatString[idx] == formatDelimiter { + // check if there is a character after the format specifier + if idx+1 >= len(formatString) { + return "", fmt.Errorf("invalid format string, expected character after " + string(formatDelimiter)) + } + formatSpecifier := formatString[idx+1] + if layout, ok := formatSpecifierToLayout[formatSpecifier]; ok { + dateTimeLayout.WriteString(layout) + idx += 2 + } else if formatSpecifier == timezoneSpecifier { + // did not find a valid specifier + // check if it is for timezone + tzLayout, err := checkTZOptions(formatString, idx+1) + if err != nil { + return "", err + } + dateTimeLayout.WriteString(tzLayout) + } else { + return "", fmt.Errorf("invalid format string, unknown format specifier: " + string(formatSpecifier)) + } + continue + } + // copy the character as is + dateTimeLayout.WriteByte(formatString[idx]) + idx++ + } + return dateTimeLayout.String(), nil +} + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, error) { + for _, layout := range p.layouts { + rv, err := time.Parse(layout, input) + if err == nil { + return rv, nil + } + } + return time.Time{}, analysis.ErrInvalidDateTime +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + layouts, ok := config["layouts"].([]interface{}) + if !ok { + return nil, fmt.Errorf("must specify layouts") + } + var layoutStrs []string + for _, layout := range layouts { + layoutStr, ok := layout.(string) + if ok { + layout, err := parseFormatString(layoutStr) + if err != nil { + return nil, err + } + layoutStrs = append(layoutStrs, layout) + } + } + return New(layoutStrs), nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/search/query/date_range.go b/search/query/date_range.go index 219ff0c58..47012fb18 100644 --- a/search/query/date_range.go +++ b/search/query/date_range.go @@ -152,11 +152,6 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { min := math.Inf(-1) max := math.Inf(1) if !q.Start.IsZero() { - if q.Start.Year() == 0 { - // year is zero, so this time.Time has unspecified date - // but is Not Zero so must have time only - q.Start.Time = q.Start.Time.AddDate(1700, 0, 0) - } if !isDatetimeCompatible(q.Start) { // overflow return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start) @@ -165,11 +160,6 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { min = numeric.Int64ToFloat64(startInt64) } if !q.End.IsZero() { - if q.End.Year() == 0 { - // year is zero, so this time.Time has unspecified date - // but is Not Zero so must have time only - q.End.Time = q.End.Time.AddDate(1700, 0, 0) - } if !isDatetimeCompatible(q.End) { // overflow return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End) From e2b4c888f0f3a242a0ceea0fecc96724494fdd69 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Mon, 4 Sep 2023 04:15:19 +0530 Subject: [PATCH 3/9] merge --- analysis/datetime/percent/percent.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/analysis/datetime/percent/percent.go b/analysis/datetime/percent/percent.go index 872dcc656..41198a866 100644 --- a/analysis/datetime/percent/percent.go +++ b/analysis/datetime/percent/percent.go @@ -126,14 +126,14 @@ func parseFormatString(formatString string) (string, error) { return dateTimeLayout.String(), nil } -func (p *DateTimeParser) ParseDateTime(input string) (time.Time, error) { +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { for _, layout := range p.layouts { rv, err := time.Parse(layout, input) if err == nil { - return rv, nil + return rv, layout, nil } } - return time.Time{}, analysis.ErrInvalidDateTime + return time.Time{}, "", analysis.ErrInvalidDateTime } func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { From bdad5a19505cde57405853371fb6ceeeb33e2ea7 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Mon, 4 Sep 2023 06:30:51 +0530 Subject: [PATCH 4/9] unit tests --- analysis/datetime/percent/percent.go | 18 +++--- analysis/datetime/percent/percent_test.go | 75 +++++++++++++++++++++++ 2 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 analysis/datetime/percent/percent_test.go diff --git a/analysis/datetime/percent/percent.go b/analysis/datetime/percent/percent.go index 41198a866..1310e0503 100644 --- a/analysis/datetime/percent/percent.go +++ b/analysis/datetime/percent/percent.go @@ -48,8 +48,8 @@ var formatSpecifierToLayout = map[byte]string{ 'I': "04", 's': "5", 'S': "05", - 'p': "pm", - 'P': "PM", + 'p': "PM", + 'P': "pm", 'N': ".999999999", } @@ -71,24 +71,24 @@ func New(layouts []string) *DateTimeParser { } } -func checkTZOptions(formatString string, idx int) (string, error) { +func checkTZOptions(formatString string, idx int) (string, int, error) { key := "Z" if idx+1 >= len(formatString) { - return "", fmt.Errorf("invalid format string, expected character after " + string(timezoneSpecifier)) + return "", 0, fmt.Errorf("invalid format string, expected character after " + string(timezoneSpecifier)) } if formatString[idx+1] == ':' { // check if there is a character after the colon if idx+2 >= len(formatString) { - return "", fmt.Errorf("invalid format string, expected character after colon") + return "", 0, fmt.Errorf("invalid format string, expected character after colon") } key += ":" idx++ } key += string(formatString[idx+1]) if layout, ok := timezoneOptions[key]; ok { - return layout, nil + return layout, idx + 2, nil } - return "", fmt.Errorf("invalid format string, unknown timezone specifier: " + key) + return "", 0, fmt.Errorf("invalid format string, unknown timezone specifier: " + key) } func parseFormatString(formatString string) (string, error) { @@ -109,7 +109,9 @@ func parseFormatString(formatString string) (string, error) { } else if formatSpecifier == timezoneSpecifier { // did not find a valid specifier // check if it is for timezone - tzLayout, err := checkTZOptions(formatString, idx+1) + var tzLayout string + var err error + tzLayout, idx, err = checkTZOptions(formatString, idx+1) if err != nil { return "", err } diff --git a/analysis/datetime/percent/percent_test.go b/analysis/datetime/percent/percent_test.go new file mode 100644 index 000000000..55e85f6b1 --- /dev/null +++ b/analysis/datetime/percent/percent_test.go @@ -0,0 +1,75 @@ +package percent + +import ( + "fmt" + "testing" +) + +func TestConversionFromPercentStyle(t *testing.T) { + tests := []struct { + input string + output string + err error + }{ + { + input: "%Y-%m-%d", + output: "2006-1-2", + err: nil, + }, + { + input: "%Y/%M%%%%%DT%H%i:%S", + output: "2006/01%%02T034:05", + err: nil, + }, + { + input: "%Y-%M-%DT%O:%I:%S%ZM", + output: "2006-01-02T15:04:05Z0700", + err: nil, + }, + { + input: "%B %D, %Y %H:%I %P %Z:M", + output: "January 02, 2006 03:04 pm Z07:00", + err: nil, + }, + { + input: "Hour %O Minute %iseconds %S%N Timezone:%Z:S, Weekday %a; Day %D Month %b, Year %y", + output: "Hour 15 Minute 4seconds 05.999999999 Timezone:Z07:00:00, Weekday Mon; Day 02 Month Jan, Year 06", + err: nil, + }, + { + input: "%Y-%M-%D%T%O:%I:%S%ZM", + output: "", + err: fmt.Errorf("invalid format string, unknown format specifier: T"), + }, + { + input: "%Y-%M-%DT%O:%I%S%ZM%", + output: "", + err: fmt.Errorf("invalid format string, invalid format string, expected character after %%"), + }, + { + input: "%Y-%M-%DT%O:%I:%S%Z", + output: "", + err: fmt.Errorf("invalid format string, expected character after Z"), + }, + { + input: "%Y-%M-%DT%O:%I:%S%Z:", + output: "", + err: fmt.Errorf("invalid format string, expected character after colon"), + }, + { + input: "%O:%I:%S%Z%H:%M:%S", + output: "", + err: fmt.Errorf("invalid format string, unknown timezone specifier: Z%%"), + }, + } + for _, test := range tests { + out, err := parseFormatString(test.input) + if err != nil && test.err == nil || err == nil && test.err != nil { + t.Fatalf("expected error %v, got error %v", test.err, err) + } + if out != test.output { + t.Fatalf("expected output %v, got %v", test.output, out) + } + } + +} From d2546a827167fc3a4c0f7bf16aa8b00bdc1d9343 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Thu, 7 Sep 2023 17:17:33 +0530 Subject: [PATCH 5/9] licensing --- analysis/datetime/percent/percent.go | 4 ++-- analysis/datetime/percent/percent_test.go | 14 ++++++++++++++ analysis/datetime/sanitized/sanitized.go | 2 +- analysis/datetime/sanitized/sanitized_test.go | 14 ++++++++++++++ 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/analysis/datetime/percent/percent.go b/analysis/datetime/percent/percent.go index 1310e0503..d44091117 100644 --- a/analysis/datetime/percent/percent.go +++ b/analysis/datetime/percent/percent.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2023 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ import ( "github.com/blevesearch/bleve/v2/registry" ) -const Name = "percentgo" +const Name = "percentstyle" var formatDelimiter byte = '%' diff --git a/analysis/datetime/percent/percent_test.go b/analysis/datetime/percent/percent_test.go index 55e85f6b1..560083ff2 100644 --- a/analysis/datetime/percent/percent_test.go +++ b/analysis/datetime/percent/percent_test.go @@ -1,3 +1,17 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package percent import ( diff --git a/analysis/datetime/sanitized/sanitized.go b/analysis/datetime/sanitized/sanitized.go index 33d271e6e..09eb94d1d 100644 --- a/analysis/datetime/sanitized/sanitized.go +++ b/analysis/datetime/sanitized/sanitized.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2023 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/analysis/datetime/sanitized/sanitized_test.go b/analysis/datetime/sanitized/sanitized_test.go index d680b248b..0bf3b15d0 100644 --- a/analysis/datetime/sanitized/sanitized_test.go +++ b/analysis/datetime/sanitized/sanitized_test.go @@ -1,3 +1,17 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package sanitized import ( From d8d8b116189fa143824abf8b45531bf5117c1db9 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Wed, 13 Sep 2023 16:45:53 +0530 Subject: [PATCH 6/9] add javastyle times --- analysis/datetime/javatime/javatime.go | 240 ++++++++++++++++++++ analysis/datetime/javatime/javatime_test.go | 88 +++++++ analysis/datetime/percent/percent.go | 107 ++++----- analysis/datetime/percent/percent_test.go | 69 ++++-- config/config.go | 2 + 5 files changed, 433 insertions(+), 73 deletions(-) create mode 100644 analysis/datetime/javatime/javatime.go create mode 100644 analysis/datetime/javatime/javatime_test.go diff --git a/analysis/datetime/javatime/javatime.go b/analysis/datetime/javatime/javatime.go new file mode 100644 index 000000000..0b525fa16 --- /dev/null +++ b/analysis/datetime/javatime/javatime.go @@ -0,0 +1,240 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package javatime + +import ( + "fmt" + "strings" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "javastyle" + +var textLiteralDelimiter byte = '\'' // single quote + +// java style date strings are represented in +// https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html +// +// Some format specifiers are not specified in go time package, such as: +// - 'V' for timezone name, like 'Europe/Berlin' or 'America/New_York'. +// - 'Q' for quarter of year, like Q3 or 3rd Quarter. +// - 'zzzz' for full name of timezone like "Japan Standard Time" or "Eastern Standard Time". +// - 'O' for localized zone-offset, like GMT+8 or GMT+08:00. +// - '[]' for optional section of the format. +// - 'G' for era, like AD or BC. +// - 'W' for week of month. +// - 'D' for day of year. +// So date strings with date elements cannot be parsed. +var timeElementToLayout = map[byte]map[int]string{ + 'M': { + 4: "January", + 3: "Jan", // MMM = short month name + 2: "01", // MM = month of year (2 digits) (01-12) + 1: "1", // M = month of year (1 digit) (1-12) + }, + 'd': { + 2: "02", // dd = day of month (2 digits) (01-31) + 1: "2", // d = day of month (1 digit) (1-31) + }, + 'a': { + 2: "pm", // PM = PM/AM + 1: "PM", // PM = PM/AM + }, + 'H': { + 2: "15", // HH = hour (24 hour clock) (2 digits) + 1: "15", // H = hour (24 hour clock) (1 digit) + }, + 'm': { + 2: "04", // mm = minute (2 digits) + 1: "4", // m = minute (1 digit) + }, + 's': { + 2: "05", // ss = seconds (2 digits) + 1: "5", // s = seconds (1 digit) + }, + + // timezone offsets from UTC below + 'X': { + 5: "Z07:00:00", // XXXXXX = timezone offset (+-hh:mm:ss) + 4: "Z070000", // XXXXX = timezone offset (+-hhmmss) + 3: "Z07:00", // XXX = timezone offset (+-hh:mm) + 2: "Z0700", // XX = timezone offset (+-hhmm) + 1: "Z07", // X = timezone offset (+-hh) + }, + 'x': { + 5: "-07:00:00", // xxxxxx = timezone offset (+-hh:mm:ss) + 4: "-070000", // xxxxx = timezone offset (+-hhmmss) + 3: "-07:00", // xxx = timezone offset (+-hh:mm) + 2: "-0700", // xx = timezone offset (+-hhmm) + 1: "-07", // x = timezone offset (+-hh) + }, +} + +type DateTimeParser struct { + layouts []string +} + +func New(layouts []string) *DateTimeParser { + return &DateTimeParser{ + layouts: layouts, + } +} + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + for _, layout := range p.layouts { + rv, err := time.Parse(layout, input) + if err == nil { + return rv, layout, nil + } + } + return time.Time{}, "", analysis.ErrInvalidDateTime +} + +func letterCounter(layout string, idx int) int { + count := 1 + for idx+count < len(layout) { + if layout[idx+count] == layout[idx] { + count++ + } else { + break + } + } + return count +} + +func invalidFormatError(character byte, count int) error { + return fmt.Errorf("invalid format string, unknown format specifier: " + strings.Repeat(string(character), count)) +} + +func parseJavaString(layout string) (string, error) { + var dateTimeLayout strings.Builder + + for idx := 0; idx < len(layout); { + // check if the character is a text literal delimiter (') + if layout[idx] == textLiteralDelimiter { + if idx+1 < len(layout) && layout[idx+1] == textLiteralDelimiter { + // if the next character is also a text literal delimiter, then + // copy the character as is + dateTimeLayout.WriteByte(textLiteralDelimiter) + idx += 2 + continue + } + // find the next text literal delimiter + for idx++; idx < len(layout); idx++ { + if layout[idx] == textLiteralDelimiter { + break + } + dateTimeLayout.WriteByte(layout[idx]) + } + // idx can either be equal to len(layout) if the text literal delimiter is not found + // after the first text literal delimiter or it will be equal to the index of the + // second text literal delimiter + if idx == len(layout) { + // text literal delimiter not found error + return "", fmt.Errorf("invalid format string, expected text literal delimiter: " + string(textLiteralDelimiter)) + } + // increment idx to skip the second text literal delimiter + idx++ + continue + } + // check if character is a letter in english alphabet - a-zA-Z which are reserved + // for format specifiers + if (layout[idx] >= 'a' && layout[idx] <= 'z') || (layout[idx] >= 'A' && layout[idx] <= 'Z') { + // find the number of times the character occurs consecutively + count := letterCounter(layout, idx) + character := layout[idx] + // first check the table + if layout, ok := timeElementToLayout[character][count]; ok { + dateTimeLayout.WriteString(layout) + } else { + switch character { + case 'y', 'u', 'Y': + // year + if count == 2 { + dateTimeLayout.WriteString("06") + } else { + format := fmt.Sprintf("%%0%ds", count) + dateTimeLayout.WriteString(fmt.Sprintf(format, "2006")) + } + case 'h', 'K': + // hour (1-12) + if count == 2 { + dateTimeLayout.WriteString("03") + } else if count == 1 { + dateTimeLayout.WriteString("3") + } else { + return "", invalidFormatError(character, count) + } + case 'E': + // day of week + if count == 4 { + dateTimeLayout.WriteString("Monday") + } else if count <= 3 { + dateTimeLayout.WriteString("Mon") + } else { + return "", invalidFormatError(character, count) + } + case 'S': + // fraction of second + if count > 9 { + return "", invalidFormatError(character, count) + } + dateTimeLayout.WriteString(strings.Repeat(string('0'), count)) + case 'z': + // timezone id + if count < 5 { + dateTimeLayout.WriteString("MST") + } else { + return "", invalidFormatError(character, count) + } + default: + return "", invalidFormatError(character, count) + } + } + idx += count + } else { + // copy the character as is + dateTimeLayout.WriteByte(layout[idx]) + idx++ + } + } + return dateTimeLayout.String(), nil +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + layouts, ok := config["layouts"].([]interface{}) + if !ok { + return nil, fmt.Errorf("must specify layouts") + } + var layoutStrs []string + for _, layout := range layouts { + layoutStr, ok := layout.(string) + if ok { + layout, err := parseJavaString(layoutStr) + if err != nil { + return nil, err + } + layoutStrs = append(layoutStrs, layout) + } + } + return New(layoutStrs), nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/analysis/datetime/javatime/javatime_test.go b/analysis/datetime/javatime/javatime_test.go new file mode 100644 index 000000000..9195e40b0 --- /dev/null +++ b/analysis/datetime/javatime/javatime_test.go @@ -0,0 +1,88 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package javatime + +import ( + "fmt" + "testing" +) + +func TestConversionFromJavaStyle(t *testing.T) { + tests := []struct { + input string + output string + err error + }{ + { + input: "yyyy-MM-dd", + output: "2006-01-02", + err: nil, + }, + { + input: "uuu/M''''dd'T'HH:m:ss.SSS", + output: "2006/1''02T15:4:05.000", + err: nil, + }, + { + input: "YYYY-MM-dd'T'H:mm:ss zzz", + output: "2006-01-02T15:04:05 MST", + err: nil, + }, + { + input: "MMMM dd yyyy', 'HH:mm:ss.SSS", + output: "January 02 2006, 15:04:05.000", + }, + { + input: "h 'o'''' clock' a, XXX", + output: "3 o' clock PM, Z07:00", + err: nil, + }, + { + input: "YYYY-MM-dd'T'HH:mm:ss'Z'", + output: "2006-01-02T15:04:05Z", + err: nil, + }, + { + input: "E MMM d H:m:s z Y", + output: "Mon Jan 2 15:4:5 MST 2006", + err: nil, + }, + { + input: "E MMM d H:m:s z Y", + output: "Mon Jan 2 15:4:5 MST 2006", + err: nil, + }, + { + input: "E MMM DD H:m:s z Y", + output: "", + err: fmt.Errorf("invalid format string, unknown format specifier: DD"), + }, + { + input: "E MMM''''' H:m:s z Y", + output: "", + err: fmt.Errorf("invalid format string, expected text literal delimiter: '"), + }, + } + for _, test := range tests { + out, err := parseJavaString(test.input) + if err != nil && test.err == nil || err == nil && test.err != nil { + t.Fatalf("expected error %v, got error %v", test.err, err) + } + if out != test.output { + t.Fatalf("expected output %v, got %v", test.output, out) + } + } + +} diff --git a/analysis/datetime/percent/percent.go b/analysis/datetime/percent/percent.go index d44091117..39e62ce92 100644 --- a/analysis/datetime/percent/percent.go +++ b/analysis/datetime/percent/percent.go @@ -27,38 +27,47 @@ const Name = "percentstyle" var formatDelimiter byte = '%' -var timezoneSpecifier byte = 'Z' - var formatSpecifierToLayout = map[byte]string{ + // format specifiers as per strftime in the C standard library + // https://man7.org/linux/man-pages/man3/strftime.3.html + formatDelimiter: string(formatDelimiter), - 'd': "2", - 'D': "02", - 'm': "1", - 'M': "01", - 'y': "06", - 'Y': "2006", - 'b': "Jan", - 'B': "January", - 'a': "Mon", - 'A': "Monday", - 'h': "3", - 'H': "03", - 'O': "15", - 'i': "4", - 'I': "04", - 's': "5", - 'S': "05", - 'p': "PM", - 'P': "pm", - 'N': ".999999999", + 'a': "Mon", // %a = short weekday name + 'A': "Monday", // %A = full weekday name + 'd': "02", // %d = day of month (2 digits) (01-31) + 'e': "2", // %e = day of month (1 digit) (1-31) + 'b': "Jan", // %b = short month name + 'B': "January", // %B = full month name + 'm': "01", // %m = month of year (2 digits) (01-12) + 'y': "06", // %y = year without century + 'Y': "2006", // %Y = year with century + 'H': "15", // %H = hour (24 hour clock) (2 digits) + 'I': "03", // %I = hour (12 hour clock) (2 digits) + 'l': "3", // %l = hour (12 hour clock) (1 digit) + 'p': "PM", // %p = PM/AM + 'P': "pm", // %P = pm/am (lowercase) + 'M': "04", // %M = minute (2 digits) + 'S': "05", // %S = seconds (2 digits) + 'f': "999999", // .%f = fraction of seconds - up to microseconds (6 digits) - deci/milli/micro + 'Z': "MST", // %Z = timezone name (GMT, JST, UTC etc) + // %z is present in timezone options + + // some additional options not in strftime to support additional options such as + // disallow 0 padding in minute and seconds, nanosecond precision, etc + 'o': "1", // %o = month of year (1 digit) (1-12) + 'i': "4", // %i = minute (1 digit) + 's': "5", // %s = seconds (1 digit) + 'N': "999999999", // .%N = fraction of seconds - up to microseconds (9 digits) - milli/micro/nano } +// some additional options for timezone +// such as allowing colon in timezone offset and specifying the seconds var timezoneOptions = map[string]string{ - "Z:M": "Z07:00", - "Z:S": "Z07:00:00", - "ZH": "Z07", - "ZM": "Z0700", - "ZS": "Z070000", + "z": "Z0700", // %z = timezone offset in +-hhmm / +-(2 digit hour)(2 digit minute) +0500, -0600 etc + "z:M": "Z07:00", // %z:M = timezone offset(+-hh:mm) / +-(2 digit hour):(2 digit minute) +05:00, -06:00 etc + "z:S": "Z07:00:00", // %z:M = timezone offset(+-hh:mm:ss) / +-(2 digit hour):(2 digit minute):(2 digit second) +05:20:00, -06:30:00 etc + "zH": "Z07", // %zH = timezone offset(+-hh) / +-(2 digit hour) +05, -06 etc + "zS": "Z070000", // %zS = timezone offset(+-hhmmss) / +-(2 digit hour)(2 digit minute)(2 digit second) +052000, -063000 etc } type DateTimeParser struct { @@ -71,24 +80,24 @@ func New(layouts []string) *DateTimeParser { } } -func checkTZOptions(formatString string, idx int) (string, int, error) { - key := "Z" - if idx+1 >= len(formatString) { - return "", 0, fmt.Errorf("invalid format string, expected character after " + string(timezoneSpecifier)) - } - if formatString[idx+1] == ':' { - // check if there is a character after the colon - if idx+2 >= len(formatString) { - return "", 0, fmt.Errorf("invalid format string, expected character after colon") +func checkTZOptions(formatString string, idx int) (string, int) { + // idx is pointing to % + // idx + 1 is pointing to z + if idx+2 < len(formatString) { + if formatString[idx+2] == ':' { + // check if there is a character after the colon + if idx+3 < len(formatString) && (formatString[idx+3] == 'M' || formatString[idx+3] == 'S') { + return timezoneOptions[fmt.Sprintf("z:%s", string(formatString[idx+3]))], idx + 4 + } + // %z: OR %z: detected; return the default layout Z0700 and increment idx by 2 to print : literally + return timezoneOptions["z"], idx + 2 + } else if formatString[idx+2] == 'H' || formatString[idx+2] == 'S' { + // %zH or %zS detected; return the layouts Z07 / z070000 and increment idx by 2 to point to the next character + // after %zH or %zS + return timezoneOptions[fmt.Sprintf("z%s", string(formatString[idx+2]))], idx + 3 } - key += ":" - idx++ - } - key += string(formatString[idx+1]) - if layout, ok := timezoneOptions[key]; ok { - return layout, idx + 2, nil } - return "", 0, fmt.Errorf("invalid format string, unknown timezone specifier: " + key) + return timezoneOptions["z"], idx + 2 } func parseFormatString(formatString string) (string, error) { @@ -96,9 +105,9 @@ func parseFormatString(formatString string) (string, error) { // iterate over the format string and replace the format specifiers with // the corresponding golang constants for idx := 0; idx < len(formatString); { - // check if the character is a format specifier + // check if the character is a format delimiter (%) if formatString[idx] == formatDelimiter { - // check if there is a character after the format specifier + // check if there is a character after the format delimiter (%) if idx+1 >= len(formatString) { return "", fmt.Errorf("invalid format string, expected character after " + string(formatDelimiter)) } @@ -106,15 +115,11 @@ func parseFormatString(formatString string) (string, error) { if layout, ok := formatSpecifierToLayout[formatSpecifier]; ok { dateTimeLayout.WriteString(layout) idx += 2 - } else if formatSpecifier == timezoneSpecifier { + } else if formatSpecifier == 'z' { // did not find a valid specifier // check if it is for timezone var tzLayout string - var err error - tzLayout, idx, err = checkTZOptions(formatString, idx+1) - if err != nil { - return "", err - } + tzLayout, idx = checkTZOptions(formatString, idx) dateTimeLayout.WriteString(tzLayout) } else { return "", fmt.Errorf("invalid format string, unknown format specifier: " + string(formatSpecifier)) diff --git a/analysis/datetime/percent/percent_test.go b/analysis/datetime/percent/percent_test.go index 560083ff2..9856a7640 100644 --- a/analysis/datetime/percent/percent_test.go +++ b/analysis/datetime/percent/percent_test.go @@ -27,53 +27,78 @@ func TestConversionFromPercentStyle(t *testing.T) { }{ { input: "%Y-%m-%d", - output: "2006-1-2", + output: "2006-01-02", err: nil, }, { - input: "%Y/%M%%%%%DT%H%i:%S", - output: "2006/01%%02T034:05", + input: "%Y/%m%%%%%dT%H%M:%S", + output: "2006/01%%02T1504:05", err: nil, }, { - input: "%Y-%M-%DT%O:%I:%S%ZM", - output: "2006-01-02T15:04:05Z0700", + input: "%Y-%m-%dT%H:%M:%S %Z%z", + output: "2006-01-02T15:04:05 MSTZ0700", err: nil, }, { - input: "%B %D, %Y %H:%I %P %Z:M", - output: "January 02, 2006 03:04 pm Z07:00", + input: "%B %e, %Y %l:%i %P %z:M", + output: "January 2, 2006 3:4 pm Z07:00", err: nil, }, { - input: "Hour %O Minute %iseconds %S%N Timezone:%Z:S, Weekday %a; Day %D Month %b, Year %y", - output: "Hour 15 Minute 4seconds 05.999999999 Timezone:Z07:00:00, Weekday Mon; Day 02 Month Jan, Year 06", + input: "Hour %H Minute %Mseconds %S.%N Timezone:%Z:S, Weekday %a; Day %d Month %b, Year %y", + output: "Hour 15 Minute 04seconds 05.999999999 Timezone:MST:S, Weekday Mon; Day 02 Month Jan, Year 06", err: nil, }, { - input: "%Y-%M-%D%T%O:%I:%S%ZM", - output: "", - err: fmt.Errorf("invalid format string, unknown format specifier: T"), + input: "%Y-%m-%dT%H:%M:%S.%N", + output: "2006-01-02T15:04:05.999999999", + err: nil, }, { - input: "%Y-%M-%DT%O:%I%S%ZM%", - output: "", - err: fmt.Errorf("invalid format string, invalid format string, expected character after %%"), + input: "%H:%M:%S %Z %z", + output: "15:04:05 MST Z0700", + err: nil, }, { - input: "%Y-%M-%DT%O:%I:%S%Z", - output: "", - err: fmt.Errorf("invalid format string, expected character after Z"), + input: "%H:%M:%S %Z %z:", + output: "15:04:05 MST Z0700:", + err: nil, + }, + { + input: "%H:%M:%S %Z %z:M", + output: "15:04:05 MST Z07:00", + err: nil, }, { - input: "%Y-%M-%DT%O:%I:%S%Z:", + input: "%H:%M:%S %Z %z:A", + output: "15:04:05 MST Z0700:A", + err: nil, + }, + { + input: "%H:%M:%S %Z %zM", + output: "15:04:05 MST Z0700M", + err: nil, + }, + { + input: "%H:%M:%S %Z %zS", + output: "15:04:05 MST Z070000", + err: nil, + }, + { + input: "%H:%M:%S %Z %z%Z %zS%z:%zH", + output: "15:04:05 MST Z0700MST Z070000Z0700:Z07", + err: nil, + }, + { + input: "%Y-%m-%d%T%H:%M:%S %ZM", output: "", - err: fmt.Errorf("invalid format string, expected character after colon"), + err: fmt.Errorf("invalid format string, unknown format specifier: T"), }, { - input: "%O:%I:%S%Z%H:%M:%S", + input: "%Y-%m-%dT%H:%M:%S %ZM%", output: "", - err: fmt.Errorf("invalid format string, unknown timezone specifier: Z%%"), + err: fmt.Errorf("invalid format string, invalid format string, expected character after %%"), }, } for _, test := range tests { diff --git a/config/config.go b/config/config.go index 2f6df4f4d..acd2cbeaa 100644 --- a/config/config.go +++ b/config/config.go @@ -70,7 +70,9 @@ import ( // date time parsers _ "github.com/blevesearch/bleve/v2/analysis/datetime/flexible" + _ "github.com/blevesearch/bleve/v2/analysis/datetime/javatime" _ "github.com/blevesearch/bleve/v2/analysis/datetime/optional" + _ "github.com/blevesearch/bleve/v2/analysis/datetime/percent" _ "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized" _ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds" _ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds" From 659d8692211bc2a78a19c4ef6ac4bc112dc383e2 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Wed, 13 Sep 2023 21:00:33 +0530 Subject: [PATCH 7/9] comment typos --- analysis/datetime/javatime/javatime.go | 25 ++++++++++++++----------- analysis/datetime/percent/percent.go | 1 + 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/analysis/datetime/javatime/javatime.go b/analysis/datetime/javatime/javatime.go index 0b525fa16..0c0e1472f 100644 --- a/analysis/datetime/javatime/javatime.go +++ b/analysis/datetime/javatime/javatime.go @@ -39,21 +39,21 @@ var textLiteralDelimiter byte = '\'' // single quote // - 'G' for era, like AD or BC. // - 'W' for week of month. // - 'D' for day of year. -// So date strings with date elements cannot be parsed. +// So date strings with these date elements cannot be parsed. var timeElementToLayout = map[byte]map[int]string{ 'M': { - 4: "January", - 3: "Jan", // MMM = short month name - 2: "01", // MM = month of year (2 digits) (01-12) - 1: "1", // M = month of year (1 digit) (1-12) + 4: "January", // MMMM = full month name + 3: "Jan", // MMM = short month name + 2: "01", // MM = month of year (2 digits) (01-12) + 1: "1", // M = month of year (1 digit) (1-12) }, 'd': { 2: "02", // dd = day of month (2 digits) (01-31) 1: "2", // d = day of month (1 digit) (1-31) }, 'a': { - 2: "pm", // PM = PM/AM - 1: "PM", // PM = PM/AM + 2: "pm", // aa = pm/am + 1: "PM", // a = PM/AM }, 'H': { 2: "15", // HH = hour (24 hour clock) (2 digits) @@ -70,15 +70,15 @@ var timeElementToLayout = map[byte]map[int]string{ // timezone offsets from UTC below 'X': { - 5: "Z07:00:00", // XXXXXX = timezone offset (+-hh:mm:ss) - 4: "Z070000", // XXXXX = timezone offset (+-hhmmss) + 5: "Z07:00:00", // XXXXX = timezone offset (+-hh:mm:ss) + 4: "Z070000", // XXXX = timezone offset (+-hhmmss) 3: "Z07:00", // XXX = timezone offset (+-hh:mm) 2: "Z0700", // XX = timezone offset (+-hhmm) 1: "Z07", // X = timezone offset (+-hh) }, 'x': { - 5: "-07:00:00", // xxxxxx = timezone offset (+-hh:mm:ss) - 4: "-070000", // xxxxx = timezone offset (+-hhmmss) + 5: "-07:00:00", // xxxxx = timezone offset (+-hh:mm:ss) + 4: "-070000", // xxxx = timezone offset (+-hhmmss) 3: "-07:00", // xxx = timezone offset (+-hh:mm) 2: "-0700", // xx = timezone offset (+-hhmm) 1: "-07", // x = timezone offset (+-hh) @@ -191,6 +191,9 @@ func parseJavaString(layout string) (string, error) { } case 'S': // fraction of second + // .SSS = millisecond + // .SSSSSS = microsecond + // .SSSSSSSSS = nanosecond if count > 9 { return "", invalidFormatError(character, count) } diff --git a/analysis/datetime/percent/percent.go b/analysis/datetime/percent/percent.go index 39e62ce92..6f05f8a4a 100644 --- a/analysis/datetime/percent/percent.go +++ b/analysis/datetime/percent/percent.go @@ -62,6 +62,7 @@ var formatSpecifierToLayout = map[byte]string{ // some additional options for timezone // such as allowing colon in timezone offset and specifying the seconds +// timezone offsets are from UTC var timezoneOptions = map[string]string{ "z": "Z0700", // %z = timezone offset in +-hhmm / +-(2 digit hour)(2 digit minute) +0500, -0600 etc "z:M": "Z07:00", // %z:M = timezone offset(+-hh:mm) / +-(2 digit hour):(2 digit minute) +05:00, -06:00 etc From 0b1144e91b18db7aff5385e7eef081d37307f0ae Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Thu, 14 Sep 2023 02:14:57 +0530 Subject: [PATCH 8/9] change java to iso --- .../{javatime/javatime.go => iso/iso.go} | 10 +- .../javatime_test.go => iso/iso_test.go} | 21 +-- analysis/datetime/percent/percent.go | 43 +++-- config/config.go | 2 +- query.go | 34 ++++ search_test.go | 149 +++++++++++++++++- 6 files changed, 220 insertions(+), 39 deletions(-) rename analysis/datetime/{javatime/javatime.go => iso/iso.go} (97%) rename analysis/datetime/{javatime/javatime_test.go => iso/iso_test.go} (85%) diff --git a/analysis/datetime/javatime/javatime.go b/analysis/datetime/iso/iso.go similarity index 97% rename from analysis/datetime/javatime/javatime.go rename to analysis/datetime/iso/iso.go index 0c0e1472f..cd75b1036 100644 --- a/analysis/datetime/javatime/javatime.go +++ b/analysis/datetime/iso/iso.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package javatime +package iso import ( "fmt" @@ -23,11 +23,11 @@ import ( "github.com/blevesearch/bleve/v2/registry" ) -const Name = "javastyle" +const Name = "isostyle" var textLiteralDelimiter byte = '\'' // single quote -// java style date strings are represented in +// ISO style date strings are represented in // https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html // // Some format specifiers are not specified in go time package, such as: @@ -121,7 +121,7 @@ func invalidFormatError(character byte, count int) error { return fmt.Errorf("invalid format string, unknown format specifier: " + strings.Repeat(string(character), count)) } -func parseJavaString(layout string) (string, error) { +func parseISOString(layout string) (string, error) { var dateTimeLayout strings.Builder for idx := 0; idx < len(layout); { @@ -228,7 +228,7 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca for _, layout := range layouts { layoutStr, ok := layout.(string) if ok { - layout, err := parseJavaString(layoutStr) + layout, err := parseISOString(layoutStr) if err != nil { return nil, err } diff --git a/analysis/datetime/javatime/javatime_test.go b/analysis/datetime/iso/iso_test.go similarity index 85% rename from analysis/datetime/javatime/javatime_test.go rename to analysis/datetime/iso/iso_test.go index 9195e40b0..2c172f4b1 100644 --- a/analysis/datetime/javatime/javatime_test.go +++ b/analysis/datetime/iso/iso_test.go @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package javatime +package iso import ( "fmt" "testing" ) -func TestConversionFromJavaStyle(t *testing.T) { +func TestConversionFromISOStyle(t *testing.T) { tests := []struct { input string output string @@ -43,6 +43,7 @@ func TestConversionFromJavaStyle(t *testing.T) { { input: "MMMM dd yyyy', 'HH:mm:ss.SSS", output: "January 02 2006, 15:04:05.000", + err: nil, }, { input: "h 'o'''' clock' a, XXX", @@ -55,13 +56,8 @@ func TestConversionFromJavaStyle(t *testing.T) { err: nil, }, { - input: "E MMM d H:m:s z Y", - output: "Mon Jan 2 15:4:5 MST 2006", - err: nil, - }, - { - input: "E MMM d H:m:s z Y", - output: "Mon Jan 2 15:4:5 MST 2006", + input: "E MMM d H:mm:ss z Y", + output: "Mon Jan 2 15:04:05 MST 2006", err: nil, }, { @@ -74,9 +70,14 @@ func TestConversionFromJavaStyle(t *testing.T) { output: "", err: fmt.Errorf("invalid format string, expected text literal delimiter: '"), }, + { + input: "MMMMM dd yyyy', 'HH:mm:ss.SSS", + output: "", + err: fmt.Errorf("invalid format string, unknown format specifier: MMMMM"), + }, } for _, test := range tests { - out, err := parseJavaString(test.input) + out, err := parseISOString(test.input) if err != nil && test.err == nil || err == nil && test.err != nil { t.Fatalf("expected error %v, got error %v", test.err, err) } diff --git a/analysis/datetime/percent/percent.go b/analysis/datetime/percent/percent.go index 6f05f8a4a..7e8202f2b 100644 --- a/analysis/datetime/percent/percent.go +++ b/analysis/datetime/percent/percent.go @@ -27,29 +27,28 @@ const Name = "percentstyle" var formatDelimiter byte = '%' +// format specifiers as per strftime in the C standard library +// https://man7.org/linux/man-pages/man3/strftime.3.html var formatSpecifierToLayout = map[byte]string{ - // format specifiers as per strftime in the C standard library - // https://man7.org/linux/man-pages/man3/strftime.3.html - - formatDelimiter: string(formatDelimiter), - 'a': "Mon", // %a = short weekday name - 'A': "Monday", // %A = full weekday name - 'd': "02", // %d = day of month (2 digits) (01-31) - 'e': "2", // %e = day of month (1 digit) (1-31) - 'b': "Jan", // %b = short month name - 'B': "January", // %B = full month name - 'm': "01", // %m = month of year (2 digits) (01-12) - 'y': "06", // %y = year without century - 'Y': "2006", // %Y = year with century - 'H': "15", // %H = hour (24 hour clock) (2 digits) - 'I': "03", // %I = hour (12 hour clock) (2 digits) - 'l': "3", // %l = hour (12 hour clock) (1 digit) - 'p': "PM", // %p = PM/AM - 'P': "pm", // %P = pm/am (lowercase) - 'M': "04", // %M = minute (2 digits) - 'S': "05", // %S = seconds (2 digits) - 'f': "999999", // .%f = fraction of seconds - up to microseconds (6 digits) - deci/milli/micro - 'Z': "MST", // %Z = timezone name (GMT, JST, UTC etc) + formatDelimiter: string(formatDelimiter), // %% = % (literal %) + 'a': "Mon", // %a = short weekday name + 'A': "Monday", // %A = full weekday name + 'd': "02", // %d = day of month (2 digits) (01-31) + 'e': "2", // %e = day of month (1 digit) (1-31) + 'b': "Jan", // %b = short month name + 'B': "January", // %B = full month name + 'm': "01", // %m = month of year (2 digits) (01-12) + 'y': "06", // %y = year without century + 'Y': "2006", // %Y = year with century + 'H': "15", // %H = hour (24 hour clock) (2 digits) + 'I': "03", // %I = hour (12 hour clock) (2 digits) + 'l': "3", // %l = hour (12 hour clock) (1 digit) + 'p': "PM", // %p = PM/AM + 'P': "pm", // %P = pm/am (lowercase) + 'M': "04", // %M = minute (2 digits) + 'S': "05", // %S = seconds (2 digits) + 'f': "999999", // .%f = fraction of seconds - up to microseconds (6 digits) - deci/milli/micro + 'Z': "MST", // %Z = timezone name (GMT, JST, UTC etc) // %z is present in timezone options // some additional options not in strftime to support additional options such as diff --git a/config/config.go b/config/config.go index acd2cbeaa..492b86f74 100644 --- a/config/config.go +++ b/config/config.go @@ -70,7 +70,7 @@ import ( // date time parsers _ "github.com/blevesearch/bleve/v2/analysis/datetime/flexible" - _ "github.com/blevesearch/bleve/v2/analysis/datetime/javatime" + _ "github.com/blevesearch/bleve/v2/analysis/datetime/iso" _ "github.com/blevesearch/bleve/v2/analysis/datetime/optional" _ "github.com/blevesearch/bleve/v2/analysis/datetime/percent" _ "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized" diff --git a/query.go b/query.go index e18026ec1..3af750a06 100644 --- a/query.go +++ b/query.go @@ -68,6 +68,40 @@ func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusi return query.NewDateRangeInclusiveQuery(start, end, startInclusive, endInclusive) } +// NewDateRangeStringQuery creates a new Query for ranges +// of date values. +// Date strings are parsed using the DateTimeParser set using +// +// the DateRangeStringQuery.SetDateTimeParser() method. +// +// If no DateTimeParser is set, then the +// +// top-level config.QueryDateTimeParser +// +// is used. +func NewDateRangeStringQuery(start, end string) *query.DateRangeStringQuery { + return query.NewDateRangeStringQuery(start, end) +} + +// NewDateRangeStringQuery creates a new Query for ranges +// of date values. +// Date strings are parsed using the DateTimeParser set using +// +// the DateRangeStringQuery.SetDateTimeParser() method. +// +// this DateTimeParser is a custom date time parser defined in the index mapping, +// using AddCustomDateTimeParser() method. +// If no DateTimeParser is set, then the +// +// top-level config.QueryDateTimeParser +// +// is used. +// Either, but not both endpoints can be nil. +// startInclusive and endInclusive control inclusion of the endpoints. +func NewDateRangeInclusiveStringQuery(start, end string, startInclusive, endInclusive *bool) *query.DateRangeStringQuery { + return query.NewDateRangeStringInclusiveQuery(start, end, startInclusive, endInclusive) +} + // NewDisjunctionQuery creates a new compound Query. // Result documents satisfy at least one Query. func NewDisjunctionQuery(disjuncts ...query.Query) *query.DisjunctionQuery { diff --git a/search_test.go b/search_test.go index 12ebc7bd4..37da8da0a 100644 --- a/search_test.go +++ b/search_test.go @@ -30,6 +30,8 @@ import ( html_char_filter "github.com/blevesearch/bleve/v2/analysis/char/html" regexp_char_filter "github.com/blevesearch/bleve/v2/analysis/char/regexp" "github.com/blevesearch/bleve/v2/analysis/datetime/flexible" + "github.com/blevesearch/bleve/v2/analysis/datetime/iso" + "github.com/blevesearch/bleve/v2/analysis/datetime/percent" "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized" "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds" "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds" @@ -2740,7 +2742,7 @@ func TestDateRangeStringQuery(t *testing.T) { for _, dtq := range testQueries { var err error - dateQuery := query.NewDateRangeStringInclusiveQuery(dtq.start, dtq.end, &dtq.includeStart, &dtq.includeEnd) + dateQuery := NewDateRangeInclusiveStringQuery(dtq.start, dtq.end, &dtq.includeStart, &dtq.includeEnd) dateQuery.SetDateTimeParser(dtq.dateTimeParser) dateQuery.SetField(dtq.field) @@ -3229,3 +3231,148 @@ func TestDateRangeTimestampQueries(t *testing.T) { } } } + +func TestPercentAndIsoStyleDates(t *testing.T) { + percentName := percent.Name + isoName := iso.Name + + imap := mapping.NewIndexMapping() + percentConfig := map[string]interface{}{ + "type": percentName, + "layouts": []interface{}{ + "%Y/%m/%d %l:%M%p", // doc 1 + "%d/%m/%Y %H:%M:%S", // doc 2 + "%Y-%m-%dT%H:%M:%S%z", // doc 3 + "%d %B %y %l%p %Z", // doc 4 + "%Y; %b %d (%a) %I:%M:%S.%N%P %z", // doc 5 + }, + } + isoConfig := map[string]interface{}{ + "type": isoName, + "layouts": []interface{}{ + "yyyy/MM/dd h:mma", // doc 1 + "dd/MM/yyyy HH:mm:ss", // doc 2 + "yyyy-MM-dd'T'HH:mm:ssXX", // doc 3 + "dd MMMM yy ha z", // doc 4 + "yyyy; MMM dd (EEE) hh:mm:ss.SSSSSaa xx", // doc 5 + }, + } + + err := imap.AddCustomDateTimeParser("percentDate", percentConfig) + if err != nil { + t.Fatal(err) + } + err = imap.AddCustomDateTimeParser("isoDate", isoConfig) + if err != nil { + t.Fatal(err) + } + + percentField := mapping.NewDateTimeFieldMapping() + percentField.DateFormat = "percentDate" + + isoField := mapping.NewDateTimeFieldMapping() + isoField.DateFormat = "isoDate" + + imap.DefaultMapping.AddFieldMappingsAt("percentDate", percentField) + imap.DefaultMapping.AddFieldMappingsAt("isoDate", isoField) + + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + idx, err := New(tmpIndexPath, imap) + if err != nil { + t.Fatal(err) + } + defer func() { + err = idx.Close() + if err != nil { + t.Fatal(err) + } + }() + + documents := map[string]map[string]interface{}{ + "doc1": { + "percentDate": "2001/08/20 6:00PM", + "isoDate": "2001/08/20 6:00PM", + }, + "doc2": { + "percentDate": "20/08/2001 18:05:00", + "isoDate": "20/08/2001 18:05:00", + }, + "doc3": { + "percentDate": "2001-08-20T18:10:00Z", + "isoDate": "2001-08-20T18:10:00Z", + }, + "doc4": { + "percentDate": "20 August 01 6PM UTC", + "isoDate": "20 August 01 6PM UTC", + }, + "doc5": { + "percentDate": "2001; Aug 20 (Mon) 06:15:15.23456pm +0000", + "isoDate": "2001; Aug 20 (Mon) 06:15:15.23456pm +0000", + }, + } + + batch := idx.NewBatch() + for docID, doc := range documents { + err := batch.Index(docID, doc) + if err != nil { + t.Fatal(err) + } + } + err = idx.Batch(batch) + if err != nil { + t.Fatal(err) + } + + type testStruct struct { + start string + end string + field string + } + + for _, field := range []string{"percentDate", "isoDate"} { + testQueries := []testStruct{ + { + start: "2001/08/20 6:00PM", + end: "2001/08/20 6:20PM", + field: field, + }, + { + start: "20/08/2001 18:00:00", + end: "20/08/2001 18:20:00", + field: field, + }, + { + start: "2001-08-20T18:00:00Z", + end: "2001-08-20T18:20:00Z", + field: field, + }, + { + start: "20 August 01 6PM UTC", + end: "20 August 01 7PM UTC", + field: field, + }, + { + start: "2001; Aug 20 (Mon) 06:00:00.00000pm +0000", + end: "2001; Aug 20 (Mon) 06:20:20.00000pm +0000", + field: field, + }, + } + includeStart := true + includeEnd := true + for _, dtq := range testQueries { + drq := NewDateRangeInclusiveStringQuery(dtq.start, dtq.end, &includeStart, &includeEnd) + drq.SetField(dtq.field) + drq.SetDateTimeParser(field) + sr := NewSearchRequest(drq) + res, err := idx.Search(sr) + if err != nil { + t.Fatal(err) + } + if len(res.Hits) != 5 { + t.Fatalf("expected %d hits, got %d", 5, len(res.Hits)) + } + } + } +} From c6171d4f57b5721addd0566d905e6cdcfc06454d Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Wed, 13 Sep 2023 15:12:32 -0600 Subject: [PATCH 9/9] Format license comment --- analysis/datetime/iso/iso_test.go | 2 +- analysis/datetime/percent/percent_test.go | 2 +- analysis/datetime/sanitized/sanitized_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/analysis/datetime/iso/iso_test.go b/analysis/datetime/iso/iso_test.go index 2c172f4b1..52681d196 100644 --- a/analysis/datetime/iso/iso_test.go +++ b/analysis/datetime/iso/iso_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Couchbase, Inc. +// Copyright (c) 2023 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/analysis/datetime/percent/percent_test.go b/analysis/datetime/percent/percent_test.go index 9856a7640..5b6932160 100644 --- a/analysis/datetime/percent/percent_test.go +++ b/analysis/datetime/percent/percent_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Couchbase, Inc. +// Copyright (c) 2023 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/analysis/datetime/sanitized/sanitized_test.go b/analysis/datetime/sanitized/sanitized_test.go index 0bf3b15d0..d62e20aad 100644 --- a/analysis/datetime/sanitized/sanitized_test.go +++ b/analysis/datetime/sanitized/sanitized_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Couchbase, Inc. +// Copyright (c) 2023 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License.