Skip to content

Commit

Permalink
feat: improve precision
Browse files Browse the repository at this point in the history
first use rtree and afterwards check if the coordinates match a polygon
  • Loading branch information
noandrea committed Jun 25, 2024
1 parent 1ae087f commit a194645
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 1,073 deletions.
90 changes: 63 additions & 27 deletions db/rtree.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ import (
)

type Geo2TzRTreeIndex struct {
land rtree.RTreeG[string]
sea rtree.RTreeG[string]
land rtree.RTreeG[timezoneGeo]
sea rtree.RTreeG[timezoneGeo]
size int
}

Expand All @@ -22,13 +22,13 @@ func IsOcean(label string) bool {
}

// Insert adds a new timezone bounding box to the index
func (g *Geo2TzRTreeIndex) Insert(min, max [2]float64, label string) {
func (g *Geo2TzRTreeIndex) Insert(min, max [2]float64, element timezoneGeo) {
g.size++
if IsOcean(label) {
g.sea.Insert(min, max, label)
if IsOcean(element.Name) {
g.sea.Insert(min, max, element)
return
}
g.land.Insert(min, max, label)
g.land.Insert(min, max, element)
}

func NewGeo2TzRTreeIndexFromGeoJSON(geoJSONPath string) (*Geo2TzRTreeIndex, error) {
Expand All @@ -45,22 +45,7 @@ func NewGeo2TzRTreeIndexFromGeoJSON(geoJSONPath string) (*Geo2TzRTreeIndex, erro
// this function will add the timezone polygons to the shape index
iter := func(tz timezoneGeo) error {
for _, p := range tz.Polygons {
minLat, minLng, maxLat, maxLng := p.Vertices[0].lat, p.Vertices[0].lng, p.Vertices[0].lat, p.Vertices[0].lng
for _, v := range p.Vertices {
if v.lng < minLng {
minLng = v.lng
}
if v.lng > maxLng {
maxLng = v.lng
}
if v.lat < minLat {
minLat = v.lat
}
if v.lat > maxLat {
maxLat = v.lat
}
}
gri.Insert([2]float64{minLat, minLng}, [2]float64{maxLat, maxLng}, tz.Name)
gri.Insert([2]float64{p.MinLat, p.MinLng}, [2]float64{p.MaxLat, p.MaxLng}, tz)
}
return nil
}
Expand All @@ -80,22 +65,33 @@ func NewGeo2TzRTreeIndexFromGeoJSON(geoJSONPath string) (*Geo2TzRTreeIndex, erro
// if the timezone is not found, it returns an error
// It first searches in the land index, if not found, it searches in the sea index
func (g *Geo2TzRTreeIndex) Lookup(lat, lng float64) (tzID string, err error) {

// search the land index
g.land.Search(
[2]float64{lat, lng},
[2]float64{lat, lng},
func(min, max [2]float64, label string) bool {
tzID = label
func(min, max [2]float64, data timezoneGeo) bool {
for _, p := range data.Polygons {
if isPointInPolygonPIP(vertex{lat, lng}, p) {
tzID = data.Name
return false
}
}
return true
},
)

if tzID == "" {
// if not found, search the sea index
g.sea.Search(
[2]float64{lat, lng},
[2]float64{lat, lng},
func(min, max [2]float64, label string) bool {
tzID = label
func(min, max [2]float64, data timezoneGeo) bool {
for _, p := range data.Polygons {
if isPointInPolygonPIP(vertex{lat, lng}, p) {
tzID = data.Name
return false
}
}
return true
},
)
Expand All @@ -111,6 +107,23 @@ func (g *Geo2TzRTreeIndex) Size() int {
return g.size
}

func isPointInPolygonPIP(point vertex, polygon polygon) bool {
oddNodes := false
n := len(polygon.Vertices)
for i := 0; i < n; i++ {
j := (i + 1) % n
vi := polygon.Vertices[i]
vj := polygon.Vertices[j]
// Check if the point lies on an edge of the polygon (including horizontal)
if (vi.lng == vj.lng && vi.lng == point.lng && point.lat >= min(vi.lat, vj.lat) && point.lat <= max(vi.lat, vj.lat)) ||
((vi.lat < point.lat && point.lat <= vj.lat) || (vj.lat < point.lat && point.lat <= vi.lat)) &&
(point.lng < (vj.lng-vi.lng)*(point.lat-vi.lat)/(vj.lat-vi.lat)+vi.lng) {
oddNodes = !oddNodes
}
}
return oddNodes
}

/*
GeoJSON processing
*/
Expand All @@ -119,6 +132,10 @@ GeoJSON processing
// with a list of vertices [lat, lng]
type polygon struct {
Vertices []vertex
MaxLat float64
MinLat float64
MaxLng float64
MinLng float64
}

type vertex struct {
Expand All @@ -137,6 +154,25 @@ type GeoJSONFeature struct {
}

func (p *polygon) AddVertex(lat, lng float64) {
if len(p.Vertices) == 0 {
p.MaxLat = lat
p.MinLat = lat
p.MaxLng = lng
p.MinLng = lng
} else {
if lat > p.MaxLat {
p.MaxLat = lat
}
if lat < p.MinLat {
p.MinLat = lat
}
if lng > p.MaxLng {
p.MaxLng = lng
}
if lng < p.MinLng {
p.MinLng = lng
}
}
p.Vertices = append(p.Vertices, vertex{lat, lng})
}

Expand Down
69 changes: 33 additions & 36 deletions db/rtree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,14 @@ func TestGeo2TzTreeIndex_LookupZone(t *testing.T) {
Tz string `json:"tz"`
Lat float64 `json:"lat"`
Lon float64 `json:"lon"`
HasError bool `json:"err,omitempty"`
NotFound bool `json:"not_found,omitempty"`
}

// load the database
gsi, err := NewGeo2TzRTreeIndexFromGeoJSON("../tzdata/timezones.zip")
assert.NoError(t, err)
assert.NotEmpty(t, gsi.Size())

// load the timezone references
var tzZones map[string]struct {
Zone string `json:"zone"`
UtcOffset float32 `json:"utc_offset_h"`
Dst struct {
Start string `json:"start"`
End string `json:"end"`
Zone string `json:"zone"`
UtcOffset float32 `json:"utc_offset_h"`
} `json:"dst,omitempty"`
}
err = helpers.LoadJSON("testdata/zones.json", &tzZones)
assert.NoError(t, err)
assert.NotEmpty(t, tzZones)

// load the coordinates
err = helpers.LoadJSON("testdata/coordinates.json", &tests)
assert.NoError(t, err)
Expand All @@ -45,31 +30,43 @@ func TestGeo2TzTreeIndex_LookupZone(t *testing.T) {
for _, tt := range tests {
t.Run(tt.Tz, func(t *testing.T) {
got, err := gsi.Lookup(tt.Lat, tt.Lon)
assert.NoError(t, err)

if tt.HasError {
t.Skip("skipping test as it is expected to fail (know error)")
}

// for oceans do exact match
if IsOcean(got) {
assert.Equal(t, tt.Tz, got, "expected %s to be %s for https://www.google.com/maps/@%v,%v,12z", tt.Tz, got, tt.Lat, tt.Lon)
if tt.NotFound {
assert.ErrorIs(t, err, ErrNotFound)
return
}
assert.NoError(t, err)
assert.Equal(t, got, tt.Tz, "expected %s to be %s for https://www.google.com/maps/@%v,%v,12z", tt.Tz, got, tt.Lat, tt.Lon)
})
}
}

// get the zone for the expected timezone
zoneExpected, ok := tzZones[tt.Tz]
assert.True(t, ok, "timezone %s not found in zones.json", tt.Tz)
// benchmark the lookup function
func BenchmarkGeo2TzTreeIndex_LookupZone(b *testing.B) {
// load the database
gsi, err := NewGeo2TzRTreeIndexFromGeoJSON("../tzdata/timezones.zip")
assert.NoError(b, err)
assert.NotEmpty(b, gsi.Size())

// get the reference timezone for the expected timezone
zoneGot, ok := tzZones[got]
assert.True(t, ok, "timezone %s not found in zones.json", got)
// load the coordinates
var tests []struct {
Tz string `json:"tz"`
Lat float64 `json:"lat"`
Lon float64 `json:"lon"`
NotFound bool `json:"not_found,omitempty"`
}
err = helpers.LoadJSON("testdata/coordinates.json", &tests)
assert.NoError(b, err)
assert.NotEmpty(b, tests)

if !ok {
assert.Equal(t, zoneExpected.Zone, got, "expected %s (%s) to be %s (%s) for https://www.google.com/maps/@%v,%v,12z", tt.Tz, zoneExpected.Zone, got, zoneGot.Zone, tt.Lat, tt.Lon)
} else {
assert.Equal(t, zoneExpected.Zone, zoneGot.Zone, "expected %s (%s) to be %s (%s) for https://www.google.com/maps/@%v,%v,12z", tt.Tz, zoneExpected.Zone, got, zoneGot.Zone, tt.Lat, tt.Lon)
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, tt := range tests {
_, err := gsi.Lookup(tt.Lat, tt.Lon)
if tt.NotFound {
assert.ErrorIs(b, err, ErrNotFound)
return
}
})
assert.NoError(b, err)
}
}
}
63 changes: 45 additions & 18 deletions db/testdata/coordinates.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
[
{
"lat": 50,
"lon": 40,
"tz": "Europe/Moscow",
"note": ""
},
{
"lat": 44.50144,
"lon": -88.0625889,
"tz": "America/Chicago"
},
{
"lat": 43.74206,
"lon": -87.73337,
"tz": "America/Chicago"
},
{
"lat": 90,
"lon": 0,
Expand All @@ -9,41 +25,52 @@
"lat": 43.42582,
"lon": 11.831443,
"tz": "Europe/Rome",
"note": "https://github.com/noandrea/geo2tz/issues/22"
"note": "https://github.com/noandrea/geo2tz/issues/22",
"not_found": true
},
{ "lat": 32.7767, "lon": -96.797, "tz": "America/Chicago" },
{ "lat": 34.0522, "lon": -118.2437, "tz": "America/Los_Angeles" },
{ "lat": 40.7128, "lon": -74.006, "tz": "America/New_York" },
{ "lat": 51.5074, "lon": -0.1278, "tz": "Europe/London" },
{ "lat": 35.6895, "lon": 139.6917, "tz": "Asia/Tokyo" },
{ "lat": 48.8566, "lon": 2.3522, "tz": "Europe/Paris" },
{ "lat": -33.8688, "lon": 151.2093, "tz": "Australia/Sydney" },
{
"lat": -33.8688,
"lon": 151.2093,
"tz": "Australia/Sydney"
},
{ "lat": 19.4326, "lon": -99.1332, "tz": "America/Mexico_City" },
{ "lat": 39.9042, "lon": 116.4074, "tz": "Asia/Shanghai" },
{ "lat": 28.6139, "lon": 77.209, "tz": "Asia/Kolkata", "err": true },
{ "lat": 28.6139, "lon": 77.209, "tz": "Asia/Kolkata" },
{ "lat": -23.5505, "lon": -46.6333, "tz": "America/Sao_Paulo" },
{ "lat": -34.6037, "lon": -58.3816, "tz": "America/Argentina/Buenos_Aires" },
{ "lat": -26.2041, "lon": 28.0473, "tz": "Africa/Johannesburg", "err": true },
{ "lat": -26.2041, "lon": 28.0473, "tz": "Africa/Johannesburg" },
{ "lat": 41.9028, "lon": 12.4964, "tz": "Europe/Rome" },
{ "lat": 37.7749, "lon": -122.4194, "tz": "America/Los_Angeles" },
{ "lat": 52.52, "lon": 13.405, "tz": "Europe/Berlin" },
{ "lat": 31.2304, "lon": 121.4737, "tz": "Asia/Shanghai" },
{ "lat": 22.3964, "lon": 114.1095, "tz": "Asia/Hong_Kong", "err": true },
{ "lat": 22.3964, "lon": 114.1095, "tz": "Asia/Hong_Kong" },
{ "lat": -1.2921, "lon": 36.8219, "tz": "Africa/Nairobi" },
{ "lat": 33.8688, "lon": 151.2093, "tz": "Australia/Sydney", "err": true },
{
"lat": 33.8688,
"lon": 151.2093,
"tz": "Australia/Sydney",
"not_found": true,
"note": "it's in the middle of the ocean"
},
{ "lat": 50.1109, "lon": 8.6821, "tz": "Europe/Berlin" },
{ "lat": 40.4168, "lon": -3.7038, "tz": "Europe/Madrid" },
{ "lat": 45.4642, "lon": 9.19, "tz": "Europe/Rome" },
{ "lat": 43.6532, "lon": -79.3832, "tz": "America/Toronto" },
{ "lat": 37.9838, "lon": 23.7275, "tz": "Europe/Athens" },
{ "lat": 1.3521, "lon": 103.8198, "tz": "Asia/Singapore", "err": true },
{ "lat": 1.3521, "lon": 103.8198, "tz": "Asia/Singapore" },
{ "lat": 19.076, "lon": 72.8777, "tz": "Asia/Kolkata" },
{ "lat": -33.9249, "lon": 18.4241, "tz": "Africa/Johannesburg" },
{ "lat": 40.7306, "lon": -73.9352, "tz": "America/New_York" },
{ "lat": 35.6762, "lon": 139.6503, "tz": "Asia/Tokyo" },
{ "lat": 34.0522, "lon": -118.244, "tz": "America/Los_Angeles" },
{ "lat": 55.6761, "lon": 12.5683, "tz": "Europe/Copenhagen" },
{ "lat": 25.276987, "lon": 55.296249, "tz": "Asia/Dubai", "err": true },
{ "lat": 25.276987, "lon": 55.296249, "tz": "Asia/Dubai" },
{ "lat": 52.3676, "lon": 4.9041, "tz": "Europe/Amsterdam" },
{ "lat": 41.0082, "lon": 28.9784, "tz": "Europe/Istanbul" },
{ "lat": 59.3293, "lon": 18.0686, "tz": "Europe/Stockholm" },
Expand All @@ -56,18 +83,18 @@
{ "lat": -22.9068, "lon": -43.1729, "tz": "America/Sao_Paulo" },
{ "lat": -34.9285, "lon": 138.6007, "tz": "Australia/Adelaide" },
{ "lat": 37.5665, "lon": 126.978, "tz": "Asia/Seoul" },
{ "lat": 13.7563, "lon": 100.5018, "tz": "Asia/Bangkok", "err": true },
{ "lat": 22.5726, "lon": 88.3639, "tz": "Asia/Kolkata", "err": true },
{ "lat": 13.7563, "lon": 100.5018, "tz": "Asia/Bangkok" },
{ "lat": 22.5726, "lon": 88.3639, "tz": "Asia/Kolkata" },
{ "lat": 37.7749, "lon": -122.4194, "tz": "America/Los_Angeles" },
{ "lat": 48.2082, "lon": 16.3738, "tz": "Europe/Vienna" },
{ "lat": 52.2297, "lon": 21.0122, "tz": "Europe/Warsaw" },
{ "lat": 50.4501, "lon": 30.5234, "tz": "Europe/Kyiv" },
{ "lat": 49.8397, "lon": 24.0297, "tz": "Europe/Kyiv", "err": true },
{ "lat": 49.8397, "lon": 24.0297, "tz": "Europe/Kyiv" },
{ "lat": 48.8566, "lon": 2.3522, "tz": "Europe/Paris" },
{ "lat": 34.6937, "lon": 135.5023, "tz": "Asia/Tokyo" },
{ "lat": 48.1351, "lon": 11.582, "tz": "Europe/Berlin" },
{ "lat": 40.4168, "lon": -3.7038, "tz": "Europe/Madrid" },
{ "lat": 1.3521, "lon": 103.8198, "tz": "Asia/Singapore", "err": true },
{ "lat": 1.3521, "lon": 103.8198, "tz": "Asia/Singapore" },
{ "lat": 50.0755, "lon": 14.4378, "tz": "Europe/Prague" },
{ "lat": 52.52, "lon": 13.405, "tz": "Europe/Berlin" },
{ "lat": 31.2304, "lon": 121.4737, "tz": "Asia/Shanghai" },
Expand All @@ -79,18 +106,18 @@
{ "lat": 30.0444, "lon": 31.2357, "tz": "Africa/Cairo" },
{ "lat": -17.8249, "lon": 31.053, "tz": "Africa/Harare" },
{ "lat": 14.5995, "lon": 120.9842, "tz": "Asia/Manila" },
{ "lat": 31.7683, "lon": 35.2137, "tz": "Asia/Jerusalem", "err": true },
{ "lat": 31.7683, "lon": 35.2137, "tz": "Asia/Jerusalem" },
{ "lat": -22.9068, "lon": -43.1729, "tz": "America/Sao_Paulo" },
{ "lat": 12.9716, "lon": 77.5946, "tz": "Asia/Kolkata" },
{ "lat": -1.2921, "lon": 36.8219, "tz": "Africa/Nairobi" },
{ "lat": 41.9028, "lon": 12.4964, "tz": "Europe/Rome" },
{ "lat": 60.1695, "lon": 24.9354, "tz": "Europe/Helsinki" },
{ "lat": 45.4215, "lon": -75.6972, "tz": "America/Toronto" },
{ "lat": -25.2744, "lon": 133.7751, "tz": "Australia/Adelaide" },
{ "lat": -25.2744, "lon": 133.7751, "tz": "Australia/Darwin" },
{ "lat": -33.8688, "lon": 151.2093, "tz": "Australia/Sydney" },
{ "lat": 50.8503, "lon": 4.3517, "tz": "Europe/Brussels" },
{ "lat": 38.7223, "lon": -9.1393, "tz": "Europe/Lisbon" },
{ "lat": 1.29027, "lon": 103.851959, "tz": "Asia/Singapore", "err": true },
{ "lat": 1.29027, "lon": 103.851959, "tz": "Asia/Singapore" },
{ "lat": 35.6895, "lon": 139.6917, "tz": "Asia/Tokyo" },
{ "lat": 37.7749, "lon": -122.4194, "tz": "America/Los_Angeles" },
{ "lat": 48.8566, "lon": 2.3522, "tz": "Europe/Paris" },
Expand All @@ -100,18 +127,18 @@
{ "lat": 55.6761, "lon": 12.5683, "tz": "Europe/Copenhagen" },
{ "lat": 19.4326, "lon": -99.1332, "tz": "America/Mexico_City" },
{ "lat": 39.9042, "lon": 116.4074, "tz": "Asia/Shanghai" },
{ "lat": 28.6139, "lon": 77.209, "tz": "Asia/Kolkata", "err": true },
{ "lat": 28.6139, "lon": 77.209, "tz": "Asia/Kolkata" },
{ "lat": -23.5505, "lon": -46.6333, "tz": "America/Sao_Paulo" },
{ "lat": -34.6037, "lon": -58.3816, "tz": "America/Argentina/Buenos_Aires" },
{ "lat": -26.2041, "lon": 28.0473, "tz": "Africa/Johannesburg", "err": true },
{ "lat": -26.2041, "lon": 28.0473, "tz": "Africa/Johannesburg" },
{ "lat": 41.9028, "lon": 12.4964, "tz": "Europe/Rome" },
{ "lat": 37.7749, "lon": -122.4194, "tz": "America/Los_Angeles" },
{ "lat": 52.52, "lon": 13.405, "tz": "Europe/Berlin" },
{ "lat": 55.9533, "lon": -3.1883, "tz": "Europe/London" },
{ "lat": 37.5665, "lon": 126.978, "tz": "Asia/Seoul" },
{ "lat": -34.6037, "lon": -58.3816, "tz": "America/Argentina/Buenos_Aires" },
{ "lat": -23.5505, "lon": -46.6333, "tz": "America/Sao_Paulo" },
{ "lat": 22.3964, "lon": 114.1095, "tz": "Asia/Hong_Kong", "err": true },
{ "lat": 22.3964, "lon": 114.1095, "tz": "Asia/Hong_Kong" },
{ "lat": 52.52, "lon": 13.405, "tz": "Europe/Berlin" },
{ "lat": 39.9042, "lon": 116.4074, "tz": "Asia/Shanghai" },
{ "lat": 48.8566, "lon": 2.3522, "tz": "Europe/Paris" },
Expand Down
Loading

0 comments on commit a194645

Please sign in to comment.