Skip to content

Commit

Permalink
Merge pull request #1060 from pelias/microserviceify-libpostal
Browse files Browse the repository at this point in the history
Microserviceify libpostal
  • Loading branch information
trescube authored Nov 28, 2017
2 parents 330c706 + 9cb4118 commit 0cc1eed
Show file tree
Hide file tree
Showing 16 changed files with 965 additions and 378 deletions.
1 change: 1 addition & 0 deletions .npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package-lock=false
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# base image
FROM pelias/libpostal_baseimage
FROM pelias/baseimage

# maintainer information
LABEL maintainer="pelias@mapzen.com"
Expand Down
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ The API recognizes the following properties under the top-level `api` key in you
|parameter|required|default|description|
|---|---|---|---|
|`host`|*yes*||specifies the url under which the http service is to run|
|`textAnalyzer`|*no*|*addressit*|can be either `libpostal` or `addressit` however will soon be **deprecated** and only `libpostal` will be supported going forward|
|`indexName`|*no*|*pelias*|name of the Elasticsearch index to be used when building queries|
|`relativeScores`|*no*|true|if set to true, confidence scores will be normalized, realistically at this point setting this to false is not tested or desirable
|`accessLog`|*no*||name of the format to use for access logs; may be any one of the [predefined values](https://github.com/expressjs/morgan#predefined-formats) in the `morgan` package. Defaults to `"common"`; if set to `false`, or an otherwise falsy value, disables access-logging entirely.|
Expand All @@ -66,7 +65,6 @@ Example configuration file would look something like this:
"host": "localhost:3100/v1/",
"indexName": "foobar",
"relativeScores": true,
"textAnalyzer": "libpostal",
"services": {
"pip": {
"url": "http://mypipservice.com:3000"
Expand Down
105 changes: 91 additions & 14 deletions controller/libpostal.js
Original file line number Diff line number Diff line change
@@ -1,31 +1,108 @@
const text_analyzer = require('pelias-text-analyzer');
const _ = require('lodash');
const iso3166 = require('iso3166-1');
const Debug = require('../helper/debug');
const debugLog = new Debug('controller:libpostal');
const logger = require('pelias-logger').get('api');

function setup(should_execute) {
// mapping object from libpostal fields to pelias fields
var field_mapping = {
island: 'island',
category: 'category',
house: 'query',
house_number: 'number',
road: 'street',
suburb: 'neighbourhood',
city_district: 'borough',
city: 'city',
state_district: 'county',
state: 'state',
postcode: 'postalcode',
country: 'country'
};

// This controller calls the hosted libpostal service and converts the response
// to a generic format for later use. The hosted service returns an array like:
//
// ```
// [
// {
// label: 'house_number',
// value: '30'
// },
// {
// label: 'road',
// value: 'west 26th street'
// },
// {
// label: 'city',
// value: 'new york'
// },
// {
// label: 'state',
// value: 'ny'
// }
//]
// ```
//
// where `label` can be any of (currently):
// - house (generally interpreted as unknown, treated by pelias like a query term)
// - category (like "restaurants")
// - house_number
// - road
// - unit (apt or suite #)
// - suburb (like a neighbourhood)
// - city
// - city_district (like an NYC borough)
// - state_district (like a county)
// - state
// - postcode
// - country
//
// The Pelias query module is not concerned with unit.
//
function setup(libpostalService, should_execute) {
function controller( req, res, next ){
// bail early if req/res don't pass conditions for execution
if (!should_execute(req, res)) {
return next();
}

const initialTime = debugLog.beginTimer(req);
// parse text with query parser
const parsed_text = text_analyzer.parse(req.clean.text);

if (parsed_text !== undefined) {
// if a known ISO2 country was parsed, convert it to ISO3
if (_.has(parsed_text, 'country') && iso3166.is2(_.toUpper(parsed_text.country))) {
parsed_text.country = iso3166.to3(_.toUpper(parsed_text.country));
libpostalService(req, (err, response) => {
if (err) {
// push err.message or err onto req.errors
req.errors.push( _.get(err, 'message', err) );

} else if (_.some(_.countBy(response, o => o.label), count => count > 1)) {
logger.warn(`discarding libpostal parse of '${req.clean.text}' due to duplicate field assignments`);
return next();

} else if (_.isEmpty(response)) {
return next();

} else {
req.clean.parser = 'libpostal';
req.clean.parsed_text = response.reduce(function(o, f) {
if (field_mapping.hasOwnProperty(f.label)) {
o[field_mapping[f.label]] = f.value;
}

return o;
}, {});

if (_.has(req.clean.parsed_text, 'country') && iso3166.is2(_.toUpper(req.clean.parsed_text.country))) {
req.clean.parsed_text.country = iso3166.to3(_.toUpper(req.clean.parsed_text.country));
}

debugLog.push(req, {parsed_text: req.clean.parsed_text});

}

req.clean.parser = 'libpostal';
req.clean.parsed_text = parsed_text;
debugLog.push(req, {parsed_text: req.clean.parsed_text});
}
debugLog.stopTimer(req, initialTime);
return next();
debugLog.stopTimer(req, initialTime);
return next();

});

}

Expand Down
75 changes: 75 additions & 0 deletions controller/structured_libpostal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
const _ = require('lodash');
const Debug = require('../helper/debug');
const debugLog = new Debug('controller:libpostal');
const logger = require('pelias-logger').get('api');

// if there's a house_number in the libpostal response, return it
// otherwise return the postcode field (which may be undefined)
function findHouseNumberField(response) {
const house_number_field = response.find(f => f.label === 'house_number');

if (house_number_field) {
return house_number_field;
}

return response.find(f => f.label === 'postcode');

}

function setup(libpostalService, should_execute) {
function controller( req, res, next ){
// bail early if req/res don't pass conditions for execution
if (!should_execute(req, res)) {
return next();
}

const initialTime = debugLog.beginTimer(req);

libpostalService(req, (err, response) => {
if (err) {
// push err.message or err onto req.errors
req.errors.push( _.get(err, 'message', err) );

} else {
// figure out which field contains the probable house number, prefer house_number
// libpostal parses some inputs, like `3370 cobbe ave`, as a postcode+street
// so because we're treating the entire field as a street address, it's safe
// to assume that an identified postcode is actually a house number.
const house_number_field = findHouseNumberField(response);

// if we're fairly certain that libpostal identified a house number
// (from either the house_number or postcode field), place it into the
// number field and remove the first instance of that value from address
// and assign to street
// eg - '1090 N Charlotte St' becomes number=1090 and street=N Charlotte St
if (house_number_field) {
req.clean.parsed_text.number = house_number_field.value;

// remove the first instance of the number and trim whitespace
req.clean.parsed_text.street = _.trim(_.replace(req.clean.parsed_text.address, req.clean.parsed_text.number, ''));

} else {
// otherwise no house number was identifiable, so treat the entire input
// as a street
req.clean.parsed_text.street = req.clean.parsed_text.address;

}

// the address field no longer means anything since it's been parsed, so remove it
delete req.clean.parsed_text.address;

debugLog.push(req, {parsed_text: response});

}

debugLog.stopTimer(req, initialTime);
return next();

});

}

return controller;
}

module.exports = setup;
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
"pelias-model": "5.2.0",
"pelias-query": "9.1.1",
"pelias-sorting": "1.1.0",
"pelias-text-analyzer": "1.10.2",
"predicates": "^2.0.0",
"retry": "^0.10.1",
"stats-lite": "^2.0.4",
Expand Down
23 changes: 22 additions & 1 deletion routes/v1.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ var controllers = {
coarse_reverse: require('../controller/coarse_reverse'),
mdToHTML: require('../controller/markdownToHtml'),
libpostal: require('../controller/libpostal'),
structured_libpostal: require('../controller/structured_libpostal'),
place: require('../controller/place'),
placeholder: require('../controller/placeholder'),
search: require('../controller/search'),
Expand Down Expand Up @@ -96,6 +97,7 @@ const PlaceHolder = require('../service/configurations/PlaceHolder');
const PointInPolygon = require('../service/configurations/PointInPolygon');
const Language = require('../service/configurations/Language');
const Interpolation = require('../service/configurations/Interpolation');
const Libpostal = require('../service/configurations/Libpostal');

/**
* Append routes to app
Expand All @@ -122,6 +124,18 @@ function addRoutes(app, peliasConfig) {
const interpolationService = serviceWrapper(interpolationConfiguration);
const isInterpolationEnabled = _.constant(interpolationConfiguration.isEnabled());

// standard libpostal should use req.clean.text for the `address` parameter
const libpostalConfiguration = new Libpostal(
_.defaultTo(peliasConfig.api.services.libpostal, {}),
_.property('clean.text'));
const libpostalService = serviceWrapper(libpostalConfiguration);

// structured libpostal should use req.clean.parsed_text.address for the `address` parameter
const structuredLibpostalConfiguration = new Libpostal(
_.defaultTo(peliasConfig.api.services.libpostal, {}),
_.property('clean.parsed_text.address'));
const structuredLibpostalService = serviceWrapper(structuredLibpostalConfiguration);

// fallback to coarse reverse when regular reverse didn't return anything
const coarseReverseShouldExecute = all(
isPipServiceEnabled, not(hasRequestErrors), not(hasResponseData)
Expand All @@ -132,6 +146,12 @@ function addRoutes(app, peliasConfig) {
not(isRequestSourcesOnlyWhosOnFirst)
);

// for libpostal to execute for structured requests, req.clean.parsed_text.address must exist
const structuredLibpostalShouldExecute = all(
not(hasRequestErrors),
hasParsedTextProperties.all('address')
);

// execute placeholder if libpostal only parsed as admin-only and needs to
// be geodisambiguated
const placeholderGeodisambiguationShouldExecute = all(
Expand Down Expand Up @@ -256,7 +276,7 @@ function addRoutes(app, peliasConfig) {
sanitizers.search.middleware(peliasConfig.api),
middleware.requestLanguage,
middleware.calcSize(),
controllers.libpostal(libpostalShouldExecute),
controllers.libpostal(libpostalService, libpostalShouldExecute),
controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute),
controllers.placeholder(placeholderService, geometricFiltersDontApply, placeholderIdsLookupShouldExecute),
controllers.search_with_ids(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute),
Expand Down Expand Up @@ -286,6 +306,7 @@ function addRoutes(app, peliasConfig) {
sanitizers.structured_geocoding.middleware(peliasConfig.api),
middleware.requestLanguage,
middleware.calcSize(),
controllers.structured_libpostal(structuredLibpostalService, structuredLibpostalShouldExecute),
controllers.search(peliasConfig.api, esclient, queries.structured_geocoding, not(hasResponseDataOrRequestErrors)),
postProc.trimByGranularityStructured(),
postProc.distances('focus.point.'),
Expand Down
44 changes: 1 addition & 43 deletions sanitizer/_synthesize_analysis.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
const _ = require('lodash');
const text_analyzer = require('pelias-text-analyzer');

const fields = {
'venue': 'query',
Expand All @@ -17,20 +16,6 @@ function normalizeWhitespaceToSingleSpace(val) {
return _.replace(_.trim(val), /\s+/g, ' ');
}

function isPostalCodeOnly(parsed_text) {
return Object.keys(parsed_text).length === 1 &&
parsed_text.hasOwnProperty('postalcode');
}

// figure out which field contains the probable house number, prefer number
// libpostal parses some inputs, like `3370 cobbe ave`, as a postcode+street
// so because we're treating the entire field as a street address, it's safe
// to assume that an identified postcode is actually a house number.
function getHouseNumberField(analyzed_address) {
// return the first field available in the libpostal response, undefined if none
return _.find(['number', 'postalcode'], _.partial(_.has, analyzed_address));
}

function _sanitize( raw, clean ){

// error & warning messages
Expand All @@ -51,35 +36,8 @@ function _sanitize( raw, clean ){
`at least one of the following fields is required: ${Object.keys(fields).join(', ')}`);
}

if (clean.parsed_text.hasOwnProperty('address')) {
const analyzed_address = text_analyzer.parse(clean.parsed_text.address);

const house_number_field = getHouseNumberField(analyzed_address);

// if we're fairly certain that libpostal identified a house number
// (from either the house_number or postcode field), place it into the
// number field and remove the first instance of that value from address
// and assign to street
// eg - '1090 N Charlotte St' becomes number=1090 and street=N Charlotte St
if (house_number_field) {
clean.parsed_text.number = analyzed_address[house_number_field];

// remove the first instance of the number and trim whitespace
clean.parsed_text.street = _.trim(_.replace(clean.parsed_text.address, clean.parsed_text.number, ''));

} else {
// otherwise no house number was identifiable, so treat the entire input
// as a street
clean.parsed_text.street = clean.parsed_text.address;

}

// the address field no longer means anything since it's been parsed, so remove it
delete clean.parsed_text.address;

}

return messages;

}

function _expected() {
Expand Down
5 changes: 5 additions & 0 deletions schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ module.exports = Joi.object().keys({
url: Joi.string().uri({ scheme: /https?/ }),
timeout: Joi.number().integer().optional().default(250).min(0),
retries: Joi.number().integer().optional().default(3).min(0),
}).unknown(false).requiredKeys('url'),
libpostal: Joi.object().keys({
url: Joi.string().uri({ scheme: /https?/ }),
timeout: Joi.number().integer().optional().default(250).min(0),
retries: Joi.number().integer().optional().default(3).min(0),
}).unknown(false).requiredKeys('url')
}).unknown(false).default({}), // default api.services to an empty object
defaultParameters: Joi.object().keys({
Expand Down
Loading

0 comments on commit 0cc1eed

Please sign in to comment.