Skip to content

Commit

Permalink
feat(normalizers): add optional normalizer for keyword fields
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed Dec 16, 2019
1 parent 1098353 commit 7a674fa
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 19 deletions.
32 changes: 16 additions & 16 deletions integration/source_layer_sourceid_filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,22 @@ module.exports.tests.source_filter = function(test, common){
});
});

// case insensitive
suite.assert( function( done ){
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
body: { query: {
term: {
source: 'OSM'
}
}}
}, function( err, res ){
t.equal( res.hits.total, 2 );
done();
});
});

// find all 'address' layers
suite.assert( function( done ){
suite.client.search({
Expand Down Expand Up @@ -104,22 +120,6 @@ module.exports.tests.source_filter = function(test, common){
});
});

// case sensitive
suite.assert( function( done ){
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
body: { query: {
term: {
source: 'OSM'
}
}}
}, function( err, res ){
t.equal( res.hits.total, 0 );
done();
});
});

// keyword analysis - no partial matching
suite.assert( function( done ){
suite.client.search({
Expand Down
1 change: 1 addition & 0 deletions mappings/partial/keyword.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
}
3 changes: 2 additions & 1 deletion mappings/partial/keyword_with_doc_values.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"type": "keyword"
"type": "keyword",
"normalizer": "peliasKeywordNormalizer"
}
9 changes: 9 additions & 0 deletions settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ function generate(){
"pattern": "[\\s,/\\\\-]+"
}
},
"normalizer": {
"peliasKeywordNormalizer": {
"type": "custom",
"filter": [
"lowercase",
"icu_folding"
]
}
},
"analyzer": {
"peliasAdmin": {
"type": "custom",
Expand Down
23 changes: 23 additions & 0 deletions test/compile.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,29 @@ module.exports.tests.analyzers = function (test, common) {
});
};

// note: this test is commented out for now because it's valid for some keyword
// fields such as bounding_box and addendum to use the null normalizer, but it's
// not easy to test because it's not possible to specify them as null in the mapping.

// ensure "normalizer" is set for keyword fields
// module.exports.tests.normalizers = function (test, common) {
// test('normalizers: ensure "normalizer" is set', function (t) {
// const keywordFields = [];

// forEachDeep(schema, (value, key) => {
// if (!_.isPlainObject(value)) { return; }
// if (_.get(value, 'type', '') !== 'keyword') { return; }
// keywordFields.push({ key: key, value: value });
// });

// keywordFields.forEach(field => {
// t.true(_.has(field.value, 'normalizer'), `normalizer not set on ${field.key}`)
// })

// t.end();
// });
// };

// current schema (compiled) - requires schema to be copied and settings to
// be regenerated from a fixture in order to pass in CI environments.
module.exports.tests.current_schema = function(test, common) {
Expand Down
32 changes: 30 additions & 2 deletions test/fixtures/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@
"pattern": "[\\s,/\\\\-]+"
}
},
"normalizer": {
"peliasKeywordNormalizer": {
"type": "custom",
"filter": [
"lowercase",
"icu_folding"
]
}
},
"analyzer": {
"peliasAdmin": {
"type": "custom",
Expand Down Expand Up @@ -575,10 +584,12 @@
"doc": {
"properties": {
"source": {
"type": "keyword"
"type": "keyword",
"normalizer": "peliasKeywordNormalizer"
},
"layer": {
"type": "keyword"
"type": "keyword",
"normalizer": "peliasKeywordNormalizer"
},
"name": {
"type": "object",
Expand Down Expand Up @@ -656,6 +667,7 @@
},
"continent_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"ocean": {
Expand Down Expand Up @@ -686,6 +698,7 @@
},
"ocean_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"empire": {
Expand Down Expand Up @@ -716,6 +729,7 @@
},
"empire_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"country": {
Expand Down Expand Up @@ -746,6 +760,7 @@
},
"country_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"dependency": {
Expand Down Expand Up @@ -776,6 +791,7 @@
},
"dependency_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"marinearea": {
Expand Down Expand Up @@ -806,6 +822,7 @@
},
"marinearea_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"macroregion": {
Expand Down Expand Up @@ -836,6 +853,7 @@
},
"macroregion_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"region": {
Expand Down Expand Up @@ -866,6 +884,7 @@
},
"region_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"macrocounty": {
Expand Down Expand Up @@ -896,6 +915,7 @@
},
"macrocounty_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"county": {
Expand Down Expand Up @@ -926,6 +946,7 @@
},
"county_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"locality": {
Expand Down Expand Up @@ -956,6 +977,7 @@
},
"locality_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"borough": {
Expand Down Expand Up @@ -986,6 +1008,7 @@
},
"borough_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"localadmin": {
Expand Down Expand Up @@ -1016,6 +1039,7 @@
},
"localadmin_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"neighbourhood": {
Expand Down Expand Up @@ -1046,6 +1070,7 @@
},
"neighbourhood_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"postalcode": {
Expand Down Expand Up @@ -1074,6 +1099,7 @@
},
"postalcode_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
}
}
Expand All @@ -1090,10 +1116,12 @@
},
"source_id": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"category": {
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
},
"population": {
Expand Down
16 changes: 16 additions & 0 deletions test/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,22 @@ module.exports.tests.analysis = function(test, common) {
});
};

// -- normalizers --

module.exports.tests.peliasKeywordNormalizer = function (test, common) {
test('has pelias keyword normalizer', function (t) {
var s = settings();
t.equal(typeof s.analysis.normalizer.peliasKeywordNormalizer, 'object', 'there is a pelias keyword normalizer');
var normalizer = s.analysis.normalizer.peliasKeywordNormalizer;
t.equal(normalizer.type, 'custom', 'custom normalizer');
t.deepEqual(normalizer.filter, [
"lowercase",
"icu_folding"
]);
t.end();
});
};

// -- analyzers --

module.exports.tests.peliasAdminAnalyzer = function(test, common) {
Expand Down

0 comments on commit 7a674fa

Please sign in to comment.