From da83b592bb7784c9704afbbf92fbb63507533866 Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 13 Dec 2019 16:27:24 +0100 Subject: [PATCH] feat(normalizers): add optional normalizer for keyword fields --- .../source_layer_sourceid_filtering.js | 32 +++++++++--------- mappings/partial/keyword.json | 1 + mappings/partial/keyword_with_doc_values.json | 3 +- settings.js | 9 +++++ test/compile.js | 23 +++++++++++++ test/fixtures/expected.json | 33 +++++++++++++++++-- test/settings.js | 16 +++++++++ 7 files changed, 98 insertions(+), 19 deletions(-) diff --git a/integration/source_layer_sourceid_filtering.js b/integration/source_layer_sourceid_filtering.js index dbdbceae..a4be2b6e 100644 --- a/integration/source_layer_sourceid_filtering.js +++ b/integration/source_layer_sourceid_filtering.js @@ -57,6 +57,22 @@ module.exports.tests.source_filter = function(test, common){ }); }); + // case insensitive + suite.assert( function( done ){ + suite.client.search({ + index: suite.props.index, + type: config.schema.typeName, + body: { query: { + term: { + source: 'OSM' + } + }} + }, function( err, res ){ + t.equal( res.hits.total, 2 ); + done(); + }); + }); + // find all 'address' layers suite.assert( function( done ){ suite.client.search({ @@ -104,22 +120,6 @@ module.exports.tests.source_filter = function(test, common){ }); }); - // case sensitive - suite.assert( function( done ){ - suite.client.search({ - index: suite.props.index, - type: config.schema.typeName, - body: { query: { - term: { - source: 'OSM' - } - }} - }, function( err, res ){ - t.equal( res.hits.total, 0 ); - done(); - }); - }); - // keyword analysis - no partial matching suite.assert( function( done ){ suite.client.search({ diff --git a/mappings/partial/keyword.json b/mappings/partial/keyword.json index 5f325492..49f379cf 100644 --- a/mappings/partial/keyword.json +++ b/mappings/partial/keyword.json @@ -1,4 +1,5 @@ { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false } diff --git a/mappings/partial/keyword_with_doc_values.json b/mappings/partial/keyword_with_doc_values.json index 16a0e08c..edb254b9 100644 --- a/mappings/partial/keyword_with_doc_values.json +++ b/mappings/partial/keyword_with_doc_values.json @@ -1,3 +1,4 @@ { - "type": "keyword" + "type": "keyword", + "normalizer": "peliasKeywordNormalizer" } diff --git a/settings.js b/settings.js index f55c41b9..ff510363 100644 --- a/settings.js +++ b/settings.js @@ -34,6 +34,15 @@ function generate(){ "pattern": "[\\s,/\\\\-]+" } }, + "normalizer": { + "peliasKeywordNormalizer": { + "type": "custom", + "filter": [ + "lowercase", + "icu_folding" + ] + } + }, "analyzer": { "peliasAdmin": { "type": "custom", diff --git a/test/compile.js b/test/compile.js index 705c896f..033e5170 100644 --- a/test/compile.js +++ b/test/compile.js @@ -99,6 +99,29 @@ module.exports.tests.analyzers = function (test, common) { }); }; +// note: this test is commented out for now because it's valid for some keyword +// fields such as bounding_box and addendum to use the null normalizer, but it's +// not easy to test because it's not possible to specify them as null in the mapping. + +// ensure "normalizer" is set for keyword fields +// module.exports.tests.normalizers = function (test, common) { +// test('normalizers: ensure "normalizer" is set', function (t) { +// const keywordFields = []; + +// forEachDeep(schema, (value, key) => { +// if (!_.isPlainObject(value)) { return; } +// if (_.get(value, 'type', '') !== 'keyword') { return; } +// keywordFields.push({ key: key, value: value }); +// }); + +// keywordFields.forEach(field => { +// t.true(_.has(field.value, 'normalizer'), `normalizer not set on ${field.key}`) +// }) + +// t.end(); +// }); +// }; + // current schema (compiled) - requires schema to be copied and settings to // be regenerated from a fixture in order to pass in CI environments. module.exports.tests.current_schema = function(test, common) { diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index 822d94d3..3284c849 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -11,6 +11,16 @@ "pattern": "[\\s,/\\\\-]+" } }, + "normalizer": { + "peliasKeywordNormalizer": { + "type": "custom", + "filter": [ + "lowercase", + "icu_folding", + "trim" + ] + } + }, "analyzer": { "peliasAdmin": { "type": "custom", @@ -575,10 +585,12 @@ "doc": { "properties": { "source": { - "type": "keyword" + "type": "keyword", + "normalizer": "peliasKeywordNormalizer" }, "layer": { - "type": "keyword" + "type": "keyword", + "normalizer": "peliasKeywordNormalizer" }, "name": { "type": "object", @@ -656,6 +668,7 @@ }, "continent_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "ocean": { @@ -686,6 +699,7 @@ }, "ocean_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "empire": { @@ -716,6 +730,7 @@ }, "empire_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "country": { @@ -746,6 +761,7 @@ }, "country_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "dependency": { @@ -776,6 +792,7 @@ }, "dependency_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "marinearea": { @@ -806,6 +823,7 @@ }, "marinearea_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "macroregion": { @@ -836,6 +854,7 @@ }, "macroregion_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "region": { @@ -866,6 +885,7 @@ }, "region_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "macrocounty": { @@ -896,6 +916,7 @@ }, "macrocounty_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "county": { @@ -926,6 +947,7 @@ }, "county_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "locality": { @@ -956,6 +978,7 @@ }, "locality_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "borough": { @@ -986,6 +1009,7 @@ }, "borough_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "localadmin": { @@ -1016,6 +1040,7 @@ }, "localadmin_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "neighbourhood": { @@ -1046,6 +1071,7 @@ }, "neighbourhood_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "postalcode": { @@ -1074,6 +1100,7 @@ }, "postalcode_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false } } @@ -1090,10 +1117,12 @@ }, "source_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "category": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "population": { diff --git a/test/settings.js b/test/settings.js index 360370af..19f65a26 100644 --- a/test/settings.js +++ b/test/settings.js @@ -48,6 +48,22 @@ module.exports.tests.analysis = function(test, common) { }); }; +// -- normalizers -- + +module.exports.tests.peliasKeywordNormalizer = function (test, common) { + test('has pelias keyword normalizer', function (t) { + var s = settings(); + t.equal(typeof s.analysis.normalizer.peliasKeywordNormalizer, 'object', 'there is a pelias keyword normalizer'); + var normalizer = s.analysis.normalizer.peliasKeywordNormalizer; + t.equal(normalizer.type, 'custom', 'custom normalizer'); + t.deepEqual(normalizer.filter, [ + "lowercase", + "icu_folding" + ]); + t.end(); + }); +}; + // -- analyzers -- module.exports.tests.peliasAdminAnalyzer = function(test, common) {