-
-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(post): add alphanumeric postcodes post-processing script
- Loading branch information
1 parent
6cfffc0
commit 182b942
Showing
5 changed files
with
137 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
const _ = require('lodash'); | ||
const ADDRESS_LAYER_NAME = 'address'; | ||
const ALPHANUMERIC_POSTCODE = /^([0-9]{4})\s*([A-Za-z]{2})$/; | ||
|
||
/** | ||
* Alphanumeric postcodes post-processing script ensures that both the expanded | ||
* and contracted version of alphanumeric postcodes are indexed. | ||
* | ||
* Without this script a postcode such as '1383GN' would not be matched to the | ||
* query '1383'. | ||
* | ||
* The script is intended to detect these alphanumeric postcodes and index both | ||
* permutations, ie. '1383GN' = ['1383GN', '1383 GN']. | ||
* | ||
* The inverse case should also be covered. ie. '1383 GN' = ['1383 GN', '1383GN']. | ||
* | ||
* Note: the regex is currently restrictive by design, the UK for instance uses | ||
* alphanumeric postcodes in the format 'E81DN' which could cause error when splitting | ||
* with this method, they are currently ignored. Future work should consider global | ||
* postcode formats. | ||
* | ||
* Note: this script is intended to run *before* the 'deduplication' post processing | ||
* script so that prior aliases don't generate duplicate terms. | ||
*/ | ||
|
||
function postcodes( doc ){ | ||
|
||
// only apply to docs from the address layer | ||
if( doc.getLayer() !== ADDRESS_LAYER_NAME ){ return; } | ||
|
||
// ensure postcode is set | ||
let postcode = doc.getAddress('zip'); | ||
if( !_.isString(postcode) || _.isEmpty(postcode) ){ return; } | ||
|
||
// ensure postcode is alphanumeric | ||
let matches = postcode.match(ALPHANUMERIC_POSTCODE); | ||
if( matches.length !== 3 ){ return; } | ||
|
||
// generate postcode aliases for the postcode. | ||
let [ match, numeric, alpha ] = matches; | ||
|
||
// detect if the existing postcode is expanded or not | ||
let isExpanded = /\s/.test(match); | ||
if ( isExpanded ) { | ||
doc.setAddressAlias('zip', `${numeric}${alpha}`); // add contracted form as alias | ||
} else { | ||
doc.setAddressAlias('zip', `${numeric} ${alpha}`); // add expanded form as alias | ||
} | ||
} | ||
|
||
module.exports = postcodes; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
const Document = require('../../Document'); | ||
const postcodes = require('../../post/alphanumeric_postcodes'); | ||
|
||
module.exports.tests = {}; | ||
|
||
module.exports.tests.alias = function(test) { | ||
test('expand', function(t) { | ||
const doc = new Document('mysource','address','myid'); | ||
|
||
// zip not set | ||
postcodes(doc); | ||
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set'); | ||
|
||
// set postcode | ||
doc.setAddress('zip', '1383GN'); | ||
|
||
// add expanded version | ||
postcodes(doc); | ||
t.deepEqual(doc.getAddressAliases('zip'), ['1383 GN'], 'alias set'); | ||
|
||
t.end(); | ||
}); | ||
test('contract', function(t) { | ||
const doc = new Document('mysource','address','myid'); | ||
|
||
// zip not set | ||
postcodes(doc); | ||
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set'); | ||
|
||
// set postcode | ||
doc.setAddress('zip', '1383 GN'); | ||
|
||
// add contracted version | ||
postcodes(doc); | ||
t.deepEqual(doc.getAddressAliases('zip'), ['1383GN'], 'alias set'); | ||
|
||
t.end(); | ||
}); | ||
}; | ||
|
||
module.exports.tests.noop = function(test) { | ||
test('noop: invalid layer != "address"', function(t) { | ||
const doc = new Document('mysource','not_address','myid'); | ||
|
||
// set postcode | ||
doc.setAddress('zip', '1383GN'); | ||
|
||
// no alias added | ||
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set'); | ||
|
||
t.end(); | ||
}); | ||
|
||
test('noop: postcode doesnt match regex', function(t) { | ||
const doc = new Document('mysource','address','myid'); | ||
|
||
// set postcode | ||
doc.setAddress('zip', 'E81DN'); | ||
|
||
// no alias added | ||
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set'); | ||
|
||
t.end(); | ||
}); | ||
}; | ||
|
||
module.exports.all = function (tape, common) { | ||
|
||
function test(name, testFunction) { | ||
return tape('post/alphanumeric_postcodes: ' + name, testFunction); | ||
} | ||
|
||
for( var testCase in module.exports.tests ){ | ||
module.exports.tests[testCase](test, common); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters