From 4604d1faca262bf970cd44550f7c96117a6829f9 Mon Sep 17 00:00:00 2001 From: Mahesh Madhav Date: Mon, 9 Sep 2024 01:58:14 +0000 Subject: [PATCH 1/5] Further performance improvements for FP math More FDIV->FMUL opportunities unlocked, following in the spirit of https://github.com/uber/h3/pull/852 --- src/h3lib/include/constants.h | 3 +++ src/h3lib/lib/bbox.c | 10 +++++----- src/h3lib/lib/coordijk.c | 16 ++++++++-------- src/h3lib/lib/faceijk.c | 4 ++-- src/h3lib/lib/latLng.c | 14 +++++++------- src/h3lib/lib/localij.c | 7 ++++--- 6 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/h3lib/include/constants.h b/src/h3lib/include/constants.h index fe84842a8..90b7036aa 100644 --- a/src/h3lib/include/constants.h +++ b/src/h3lib/include/constants.h @@ -50,6 +50,9 @@ /** one third **/ #define M_ONETHIRD 0.333333333333333333333333333333333333333 +/** one seventh (1/7) **/ +#define M_ONESEVENTH 0.14285714285714285714285714285714285 + /** rotation angle between Class II and Class III resolution axes * (asin(sqrt(3.0 / 28.0))) */ #define M_AP7_ROT_RADS 0.333473172251832115336090755351601070065900389 diff --git a/src/h3lib/lib/bbox.c b/src/h3lib/lib/bbox.c index 70d56ffe2..2c0bb87bd 100644 --- a/src/h3lib/lib/bbox.c +++ b/src/h3lib/lib/bbox.c @@ -57,10 +57,10 @@ bool bboxIsTransmeridian(const BBox *bbox) { return bbox->east < bbox->west; } * @param center Output center coordinate */ void bboxCenter(const BBox *bbox, LatLng *center) { - center->lat = (bbox->north + bbox->south) / 2.0; + center->lat = (bbox->north + bbox->south) * 0.5; // If the bbox crosses the antimeridian, shift east 360 degrees double east = bboxIsTransmeridian(bbox) ? bbox->east + M_2PI : bbox->east; - center->lng = constrainLng((east + bbox->west) / 2.0); + center->lng = constrainLng((east + bbox->west) * 0.5); } /** @@ -265,8 +265,8 @@ H3Error lineHexEstimate(const LatLng *origin, const LatLng *destination, void scaleBBox(BBox *bbox, double scale) { double width = bboxWidthRads(bbox); double height = bboxHeightRads(bbox); - double widthBuffer = (width * scale - width) / 2; - double heightBuffer = (height * scale - height) / 2; + double widthBuffer = (width * scale - width) * 0.5; + double heightBuffer = (height * scale - height) * 0.5; // Scale north and south, clamping to latitude domain bbox->north += heightBuffer; if (bbox->north > M_PI_2) bbox->north = M_PI_2; @@ -308,4 +308,4 @@ void bboxNormalization(const BBox *a, const BBox *b, : aIsTransmeridian ? NORMALIZE_EAST : aToBTrendsEast ? NORMALIZE_WEST : NORMALIZE_EAST; -} \ No newline at end of file +} diff --git a/src/h3lib/lib/coordijk.c b/src/h3lib/lib/coordijk.c index 7f9ff7cb5..0bed0250b 100644 --- a/src/h3lib/lib/coordijk.c +++ b/src/h3lib/lib/coordijk.c @@ -345,8 +345,8 @@ H3Error _upAp7Checked(CoordIJK *ijk) { } } - ijk->i = (int)lround(((i * 3) - j) / 7.0); - ijk->j = (int)lround((i + (j * 2)) / 7.0); + ijk->i = (int)lround(((i * 3) - j) * M_ONESEVENTH); + ijk->j = (int)lround((i + (j * 2)) * M_ONESEVENTH); ijk->k = 0; // Expected not to be reachable, because max + min or max - min would need @@ -393,8 +393,8 @@ H3Error _upAp7rChecked(CoordIJK *ijk) { } } - ijk->i = (int)lround(((i * 2) + j) / 7.0); - ijk->j = (int)lround(((j * 3) - i) / 7.0); + ijk->i = (int)lround(((i * 2) + j) * M_ONESEVENTH); + ijk->j = (int)lround(((j * 3) - i) * M_ONESEVENTH); ijk->k = 0; // Expected not to be reachable, because max + min or max - min would need @@ -417,8 +417,8 @@ void _upAp7(CoordIJK *ijk) { int i = ijk->i - ijk->k; int j = ijk->j - ijk->k; - ijk->i = (int)lround((3 * i - j) / 7.0); - ijk->j = (int)lround((i + 2 * j) / 7.0); + ijk->i = (int)lround((3 * i - j) * M_ONESEVENTH); + ijk->j = (int)lround((i + 2 * j) * M_ONESEVENTH); ijk->k = 0; _ijkNormalize(ijk); } @@ -434,8 +434,8 @@ void _upAp7r(CoordIJK *ijk) { int i = ijk->i - ijk->k; int j = ijk->j - ijk->k; - ijk->i = (int)lround((2 * i + j) / 7.0); - ijk->j = (int)lround((3 * j - i) / 7.0); + ijk->i = (int)lround((2 * i + j) * M_ONESEVENTH); + ijk->j = (int)lround((3 * j - i) * M_ONESEVENTH); ijk->k = 0; _ijkNormalize(ijk); } diff --git a/src/h3lib/lib/faceijk.c b/src/h3lib/lib/faceijk.c index 133b5f4bc..a408bf0f3 100644 --- a/src/h3lib/lib/faceijk.c +++ b/src/h3lib/lib/faceijk.c @@ -393,7 +393,7 @@ void _geoToHex2d(const LatLng *g, int res, int *face, Vec2d *v) { _geoToClosestFace(g, face, &sqd); // cos(r) = 1 - 2 * sin^2(r/2) = 1 - 2 * (sqd / 4) = 1 - sqd/2 - double r = acos(1 - sqd / 2); + double r = acos(1 - sqd * 0.5); if (r < EPSILON) { v->x = v->y = 0.0; @@ -413,7 +413,7 @@ void _geoToHex2d(const LatLng *g, int res, int *face, Vec2d *v) { r = tan(r); // scale for current resolution length u - r /= RES0_U_GNOMONIC; + r *= INV_RES0_U_GNOMONIC; for (int i = 0; i < res; i++) r *= M_SQRT7; // we now have (r, theta) in hex2d with theta ccw from x-axes diff --git a/src/h3lib/lib/latLng.c b/src/h3lib/lib/latLng.c index 8d8e8cad2..eb3b11730 100644 --- a/src/h3lib/lib/latLng.c +++ b/src/h3lib/lib/latLng.c @@ -169,8 +169,8 @@ double normalizeLng(const double lng, * @return the great circle distance in radians between a and b */ double H3_EXPORT(greatCircleDistanceRads)(const LatLng *a, const LatLng *b) { - double sinLat = sin((b->lat - a->lat) / 2.0); - double sinLng = sin((b->lng - a->lng) / 2.0); + double sinLat = sin((b->lat - a->lat) * 0.5); + double sinLng = sin((b->lng - a->lng) * 0.5); double A = sinLat * sinLat + cos(a->lat) * cos(b->lat) * sinLng * sinLng; @@ -355,12 +355,12 @@ H3Error H3_EXPORT(getNumCells)(int res, int64_t *out) { * @return area in radians^2 of triangle on unit sphere */ double triangleEdgeLengthsToArea(double a, double b, double c) { - double s = (a + b + c) / 2; + double s = (a + b + c) * 0.5; - a = (s - a) / 2; - b = (s - b) / 2; - c = (s - c) / 2; - s = s / 2; + a = (s - a) * 0.5; + b = (s - b) * 0.5; + c = (s - c) * 0.5; + s = s * 0.5; return 4 * atan(sqrt(tan(s) * tan(a) * tan(b) * tan(c))); } diff --git a/src/h3lib/lib/localij.c b/src/h3lib/lib/localij.c index cac65b8c3..bf0388b50 100644 --- a/src/h3lib/lib/localij.c +++ b/src/h3lib/lib/localij.c @@ -702,12 +702,13 @@ H3Error H3_EXPORT(gridPathCells)(H3Index start, H3Index end, H3Index *out) { ijkToCube(&startIjk); ijkToCube(&endIjk); + double invDistance = distance ? 1.0 / (double)distance : 0; double iStep = - distance ? (double)(endIjk.i - startIjk.i) / (double)distance : 0; + distance ? (double)(endIjk.i - startIjk.i) * invDistance : 0; double jStep = - distance ? (double)(endIjk.j - startIjk.j) / (double)distance : 0; + distance ? (double)(endIjk.j - startIjk.j) * invDistance : 0; double kStep = - distance ? (double)(endIjk.k - startIjk.k) / (double)distance : 0; + distance ? (double)(endIjk.k - startIjk.k) * invDistance : 0; CoordIJK currentIjk = {startIjk.i, startIjk.j, startIjk.k}; for (int64_t n = 0; n <= distance; n++) { From 291ada932997f6f3c5ece78902a65d9b2b62f799 Mon Sep 17 00:00:00 2001 From: Mahesh Madhav Date: Mon, 9 Sep 2024 02:53:51 +0000 Subject: [PATCH 2/5] Formatting fix --- src/h3lib/lib/localij.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/h3lib/lib/localij.c b/src/h3lib/lib/localij.c index bf0388b50..5b5ec6f33 100644 --- a/src/h3lib/lib/localij.c +++ b/src/h3lib/lib/localij.c @@ -703,12 +703,9 @@ H3Error H3_EXPORT(gridPathCells)(H3Index start, H3Index end, H3Index *out) { ijkToCube(&endIjk); double invDistance = distance ? 1.0 / (double)distance : 0; - double iStep = - distance ? (double)(endIjk.i - startIjk.i) * invDistance : 0; - double jStep = - distance ? (double)(endIjk.j - startIjk.j) * invDistance : 0; - double kStep = - distance ? (double)(endIjk.k - startIjk.k) * invDistance : 0; + double iStep = distance ? (double)(endIjk.i - startIjk.i) * invDistance : 0; + double jStep = distance ? (double)(endIjk.j - startIjk.j) * invDistance : 0; + double kStep = distance ? (double)(endIjk.k - startIjk.k) * invDistance : 0; CoordIJK currentIjk = {startIjk.i, startIjk.j, startIjk.k}; for (int64_t n = 0; n <= distance; n++) { From dd45fbe21ed0731ec48314ac3724327d69a51775 Mon Sep 17 00:00:00 2001 From: Mahesh Madhav <67384846+heshpdx@users.noreply.github.com> Date: Tue, 10 Sep 2024 16:57:02 -0700 Subject: [PATCH 3/5] Update src/h3lib/lib/localij.c Co-authored-by: Nick Rabinowitz --- src/h3lib/lib/localij.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/h3lib/lib/localij.c b/src/h3lib/lib/localij.c index 5b5ec6f33..e9444ab09 100644 --- a/src/h3lib/lib/localij.c +++ b/src/h3lib/lib/localij.c @@ -703,9 +703,9 @@ H3Error H3_EXPORT(gridPathCells)(H3Index start, H3Index end, H3Index *out) { ijkToCube(&endIjk); double invDistance = distance ? 1.0 / (double)distance : 0; - double iStep = distance ? (double)(endIjk.i - startIjk.i) * invDistance : 0; - double jStep = distance ? (double)(endIjk.j - startIjk.j) * invDistance : 0; - double kStep = distance ? (double)(endIjk.k - startIjk.k) * invDistance : 0; + double iStep = (double)(endIjk.i - startIjk.i) * invDistance; + double jStep = (double)(endIjk.j - startIjk.j) * invDistance; + double kStep = (double)(endIjk.k - startIjk.k) * invDistance; CoordIJK currentIjk = {startIjk.i, startIjk.j, startIjk.k}; for (int64_t n = 0; n <= distance; n++) { From 607458864e6b29f2c8efc4089c699cd9b80a1550 Mon Sep 17 00:00:00 2001 From: Mahesh Madhav Date: Thu, 19 Sep 2024 02:01:57 +0000 Subject: [PATCH 4/5] Add #905 to CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 495a030b5..d29dd51d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ The public API of this library consists of the functions declared in file ### Changed - Replace internal algorithm for `polygonToCells` with a new version that is more memory-efficient (#785) - Reorganize tests into public / internal. (#762) -- Performance enhancement for aarch64, should not affect other platforms (#790, #792, #852) +- Performance enhancement for aarch64, may improve other platforms (#790, #792, #852, #905) - `clang-format` upgraded to version 14. (#834) - Fixed tests that incorrectly did not test resolution 15. (#820) - Use `CMAKE_INSTALL_LIBDIR` when choosing where to install library files. (#819) From b7542c3b4bbbda6eb5abcae768dd19c6ee802a9c Mon Sep 17 00:00:00 2001 From: Mahesh Madhav Date: Thu, 19 Sep 2024 02:17:45 +0000 Subject: [PATCH 5/5] Save one fdiv and maybe a cosine --- src/h3lib/lib/latLng.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/h3lib/lib/latLng.c b/src/h3lib/lib/latLng.c index eb3b11730..801dc9af8 100644 --- a/src/h3lib/lib/latLng.c +++ b/src/h3lib/lib/latLng.c @@ -258,9 +258,10 @@ void _geoAzDistanceRads(const LatLng *p1, double az, double distance, p2->lat = -M_PI_2; p2->lng = 0.0; } else { - sinlng = sin(az) * sin(distance) / cos(p2->lat); + double invcosp2lat = 1.0 / cos(p2->lat); + sinlng = sin(az) * sin(distance) * invcosp2lat; coslng = (cos(distance) - sin(p1->lat) * sin(p2->lat)) / - cos(p1->lat) / cos(p2->lat); + cos(p1->lat) * invcosp2lat; if (sinlng > 1.0) sinlng = 1.0; if (sinlng < -1.0) sinlng = -1.0; if (coslng > 1.0) coslng = 1.0;