diff --git a/.gitignore b/.gitignore index fc06e7c..edc742f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,12 @@ # Created by https://www.toptal.com/developers/gitignore/api/c,c++,cmake,macos,linux,windows # Edit at https://www.toptal.com/developers/gitignore?templates=c,c++,cmake,macos,linux,windows +### Common ### +.vs/ +.vscode/ +out/ +build/ + ### C ### # Prerequisites *.d diff --git a/CMakeLists.txt b/CMakeLists.txt index 9cc8b24..9ad0c58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,4 +60,12 @@ if(MATH_BUILD_TESTS) add_executable(TestMathMatrix tests/test-matrix.cpp) target_link_libraries(TestMathMatrix PUBLIC math-static) add_test(NAME TestMathMatrix COMMAND TestMathMatrix) + + add_executable(TestMathQuaternion tests/test-quaternion.cpp) + target_link_libraries(TestMathQuaternion PUBLIC math-static) + add_test(NAME TestMathQuaternion COMMAND TestMathQuaternion) + + add_executable(TestMathContainers tests/test-containers.cpp) + target_link_libraries(TestMathContainers PUBLIC math-static) + add_test(NAME TestMathContainers COMMAND TestMathContainers) endif() \ No newline at end of file diff --git a/include/math/aabb.hpp b/include/math/aabb.hpp index 543acd8..af93766 100644 --- a/include/math/aabb.hpp +++ b/include/math/aabb.hpp @@ -22,6 +22,8 @@ #include "math/ray.hpp" #include "math/plane.hpp" +// TODO: Port more Jolt optimized BBox functions. + namespace math { @@ -38,179 +40,194 @@ namespace math struct [[nodiscard]] Aabb { protected: - float3 min, max; + simd_f32_4 min, max; public: /** - * @brief Creates a new Axis Aligned Bounding Box. (AABB) + * @brief Creates a new Axis Aligned Bounding Box in 3D space. (AABB) * - * @param[in] min minimum bounding box corner position in the space - * @param[in] max maximum bounding box corner position in the space + * @param min minimum bounding box corner position in 3D space + * @param max maximum bounding box corner position in 3d space */ - constexpr Aabb(const float3& min = float3::zero, const float3& max = float3::zero) noexcept : min(min), max(max) + Aabb(simd_f32_4 min = simd_f32_4::zero, simd_f32_4 max = simd_f32_4::zero) noexcept : min(min), max(max) { assert(areAllTrue(min <= max)); } /** - * @brief Returns minimum AABB corner position in the space. + * @brief Returns minimum AABB corner position in 3D space. * @details The point that has the smallest values for all axes. */ - constexpr const float3& getMin() const noexcept { return min; } + const simd_f32_4& getMin() const noexcept { return min; } /** - * @brief Returns maximum AABB corner position in the space. + * @brief Returns maximum AABB corner position in 3D space. * @details The point that has the biggest values for all axes. */ - constexpr const float3& getMax() const noexcept { return max; } + const simd_f32_4& getMax() const noexcept { return max; } /** - * @brief Sets minimum AABB corner position in the space. + * @brief Sets minimum AABB corner position in 3D space. * @details See the @ref getMin(). - * @param[in] min minimum bounding box corner position in the space + * @param min minimum bounding box corner position in 3D space */ - void setMin(const float3& min) noexcept + void setMin(simd_f32_4 min) noexcept { assert(areAllTrue(min <= this->max)); this->min = min; } /** - * @brief Sets maximum AABB corner position in the space. + * @brief Sets maximum AABB corner position in 3D space. * @details See the @ref setMax(). - * @param[in] max maximum bounding box corner position in the space + * @param max maximum bounding box corner position in 3D space */ - void setMax(const float3& max) noexcept + void setMax(simd_f32_4 max) noexcept { assert(areAllTrue(max >= this->min)); this->max = max; } /** - * @brief Sets minimum and maximum AABB corner position in the space. + * @brief Sets minimum and maximum AABB corner position in 3D space. * @details See the @ref getMin() and @ref getMax(). * - * @param[in] min minimum bounding box corner position in the space - * @param[in] max maximum bounding box corner position in the space + * @param min minimum bounding box corner position in 3D space + * @param max maximum bounding box corner position in 3D space */ - void set(const float3& min, const float3& max) noexcept + void set(simd_f32_4 min, simd_f32_4 max) noexcept { assert(areAllTrue(min <= max)); - this->min = min; - this->max = max; + this->min = min; this->max = max; } /** - * @brief Sets size and position of the bounding box. + * @brief Sets size of the bounding box. + * @param size target size of the bounding box + */ + void setSize(simd_f32_4 size) noexcept + { + assert(areAllTrue(size >= 0.0f)); + auto extent = size * 0.5f; + min = -extent; max = extent; + } + /** + * @brief Sets size and position of the bounding box in 3D space. * - * @param[in] size target size of the bounding box - * @param[in] position target position of the bounding box in space + * @param size target size of the bounding box + * @param position target position of the bounding box in 3D space */ - void setSize(const float3& size, const float3& position = float3::zero) noexcept + void setSize(simd_f32_4 size, simd_f32_4 position) noexcept { assert(areAllTrue(size >= 0.0f)); auto extent = size * 0.5f; - min = position - extent; - max = position + extent; + min = position - extent; max = position + extent; } /** * @brief Returns size of the bounding box. */ - constexpr float3 getSize() const noexcept { return max - min; } + simd_f32_4 getSize() const noexcept { return max - min; } /** - * @brief Returns position of the bounding box. + * @brief Returns position of the bounding box in 3D space. */ - constexpr float3 getPosition() const noexcept { return (min + max) * 0.5f; } + simd_f32_4 getPosition() const noexcept { return (min + max) * 0.5f; } /******************************************************************************************************************* - * @brief Sets extent and position of the bounding box. - * - * @param[in] extent target extent of the bounding box (half size) - * @param[in] position target position of the bounding box in space + * @brief Sets extent and position of the bounding box in 3D space. + * @param extent target extent of the bounding box (half size) */ - void setExtent(const float3& extent, const float3& position = float3::zero) noexcept + void setExtent(simd_f32_4 extent) noexcept { assert(areAllTrue(extent >= 0.0f)); - min = position - extent; - max = position + extent; + min = -extent; max = extent; } - /** - * @brief Returns extent and position of the bounding box. + * @brief Sets extent and position of the bounding box in 3D space. * - * @param[out] extent target extent of the bounding box (half size) - * @param[out] position target position of the bounding box in space + * @param extent target extent of the bounding box (half size) + * @param position target position of the bounding box in 3D space */ - void getExtent(float3& extent, float3& position) const noexcept + void setExtent(simd_f32_4 extent, simd_f32_4 position) noexcept { - extent = (max - min) * 0.5f; - position = min + extent; + assert(areAllTrue(extent >= 0.0f)); + min = position - extent; max = position + extent; } + /** * @brief Returns extent of the bounding box. (half size) */ - constexpr float3 getExtent() const noexcept { return (max - min) * 0.5f; } + simd_f32_4 getExtent() const noexcept { return (max - min) * 0.5f; } + /** + * @brief Returns extent and position of the bounding box in 3D space. + * + * @param extent target extent of the bounding box (half size) + * @param position target position of the bounding box in 3D space + */ + void getExtent(simd_f32_4& extent, simd_f32_4& position) const noexcept + { + extent = (max - min) * 0.5f; position = min + extent; + } /** - * @brief Extends bounding box min and max corner positions. - * @param[in] point target point to include into the bounding box + * @brief Extends bounding box min and max corner positions in 3D space. + * @param point target point to include into the bounding box in 3D space */ - void extend(const float3& point) noexcept + void extend(simd_f32_4 point) noexcept { - min = math::min(min, point); - max = math::max(max, point); + min = math::min(min, point); max = math::max(max, point); } /** - * @brief Extends bounding box min and max corner positions. + * @brief Extends bounding box min and max corner positions in 3D space. * @param[in] aabb target AABB to include into the bounding box */ void extend(const Aabb& aabb) noexcept { - min = math::min(min, aabb.min); - max = math::max(max, aabb.max); + min = math::min(min, aabb.min); max = math::max(max, aabb.max); } /** - * @brief Calculates area of the AABB. + * @brief Translates bounding box in 3D space. + * @param translation target translation transformation in 3D space */ - constexpr float calcArea() const noexcept - { - auto extent = max - min; - return extent.x * extent.y + extent.y * extent.z + extent.z * extent.x; - } + void translate(simd_f32_4 translation) noexcept { min += translation; max += translation; } + /** + * @brief Scales bounding box in 3D space. + * @param scale target scale transformation in 3D space + */ + void scale(simd_f32_4 scale) noexcept { min *= scale; max *= scale; } - constexpr Aabb operator*(const float3& v) const noexcept { return Aabb(min * v, max * v); } - constexpr Aabb operator/(const float3& v) const noexcept { return Aabb(min / v, max / v); } - constexpr Aabb operator+(const float3& v) const noexcept { return Aabb(min + v, max + v); } - constexpr Aabb operator-(const float3& v) const noexcept { return Aabb(min - v, max - v); } - Aabb& operator*=(const float3& v) noexcept { min *= v; max *= v; return *this; } - Aabb& operator/=(const float3& v) noexcept { min /= v; max /= v; return *this; } - Aabb& operator+=(const float3& v) noexcept { min += v; max += v; return *this; } - Aabb& operator-=(const float3& v) noexcept { min -= v; max -= v; return *this; } - constexpr bool operator==(const Aabb& v) const noexcept { return min == v.min && max == v.max; } - constexpr bool operator!=(const Aabb& v) const noexcept { return min != v.min || max != v.max; } - - constexpr bool operator<(const Aabb& v) const noexcept - { - return areAllTrue(min < v.min) && areAllTrue(max < v.max); - } - constexpr bool operator>(const Aabb& v) const noexcept - { - return areAllTrue(min > v.min) && areAllTrue(max > v.max); - } - constexpr bool operator<=(const Aabb& v) const noexcept + // TODO: update min max using rotation matrix + + /** + * @brief Calculates area of the bounding box. + */ + float calcArea() const noexcept { - return areAllTrue(min <= v.min) && areAllTrue(max <= v.max); + auto extent = max - min; + return 2.0f * (extent.getX() * extent.getY() + extent.getX() * extent.getZ() + extent.getY() * extent.getZ()); } - constexpr bool operator>=(const Aabb& v) const noexcept + /** + * @brief Calculates volume of the bounding box. + */ + float calcVolume() const noexcept { - return areAllTrue(min >= v.min) && areAllTrue(max >= v.max); + auto extent = max - min; + return extent.getX() * extent.getY() * extent.getZ(); } + + Aabb operator*(simd_f32_4 v) const noexcept { return Aabb(min * v, max * v); } + Aabb operator/(simd_f32_4 v) const noexcept { return Aabb(min / v, max / v); } + Aabb operator+(simd_f32_4 v) const noexcept { return Aabb(min + v, max + v); } + Aabb operator-(simd_f32_4 v) const noexcept { return Aabb(min - v, max - v); } + Aabb& operator*=(simd_f32_4 v) noexcept { min *= v; max *= v; return *this; } + Aabb& operator/=(simd_f32_4 v) noexcept { min /= v; max /= v; return *this; } + Aabb& operator+=(simd_f32_4 v) noexcept { min += v; max += v; return *this; } + Aabb& operator-=(simd_f32_4 v) noexcept { min -= v; max -= v; return *this; } static const Aabb zero, one, two, half; }; -inline const Aabb Aabb::zero = Aabb(float3::zero, float3::zero); -inline const Aabb Aabb::one = Aabb(float3(-0.5f), float3(0.5f)); -inline const Aabb Aabb::two = Aabb(float3::minusOne, float3::one); -inline const Aabb Aabb::half = Aabb(float3(-0.25f), float3(0.25f)); +inline const Aabb Aabb::zero = Aabb(simd_f32_4::zero, simd_f32_4::zero); +inline const Aabb Aabb::one = Aabb(simd_f32_4(-0.5f), simd_f32_4(0.5f)); +inline const Aabb Aabb::two = Aabb(simd_f32_4::minusOne, simd_f32_4::one); +inline const Aabb Aabb::half = Aabb(simd_f32_4(-0.25f), simd_f32_4(0.25f)); /** * @brief Returns true if first AABB binary representation is less than the second. @@ -221,59 +238,113 @@ inline const Aabb Aabb::half = Aabb(float3(-0.25f), float3(0.25f)); static bool isBinaryLess(const Aabb& a, const Aabb& b) noexcept { return memcmp(&a, &b, sizeof(float3) * 2) < 0; } /*********************************************************************************************************************** - * @brief Returns true if point is inside the AABB. + * @brief Returns true if point is inside the AABB in 3D space. * * @param[in] aabb target AABB to check - * @param[in] point target point in the space + * @param point target point in the space + */ +static bool isInside(const Aabb& aabb, simd_f32_4 point) noexcept +{ + return areAllTrue(aabb.getMin() <= point & aabb.getMax() >= point); +} +/** + * @brief Returns true if other AABB is inside this AABB in 3D space. + * + * @param[in] aabb this AABB + * @param[in] other another AABB to check */ -static constexpr bool isInside(const Aabb& aabb, const float3& point) noexcept +static bool isInside(const Aabb& aabb, const Aabb& other) noexcept { - return areAllTrue(aabb.getMin() <= point) && areAllTrue(point <= aabb.getMax()); + return areAllTrue(aabb.getMin() <= other.getMin() & aabb.getMax() >= other.getMax()); } + /** - * @brief Returns closest point inside AABB to the specified one. + * @brief Returns closest point inside AABB to the specified one in 3D space. * * @param[in] aabb target AABB to use - * @param[in] point target point in the space + * @param point target point in the space */ -static constexpr float3 closestPoint(const Aabb& aabb, const float3& point) noexcept +static simd_f32_4 closestPoint(const Aabb& aabb, simd_f32_4 point) noexcept { return clamp(point, aabb.getMin(), aabb.getMax()); } /** - * @brief Calculates where ray intersects the AABB. - * @return Distance to the intersection points. + * @brief Calculates where ray intersects the AABB in 3D space. (Ray is inversed!) + * @return Distance to the intersection points, or FLT_MAX; -FLT_MAX if no hit. * - * @param[in] aabb target AABB to raycast - * @param[in] ray target ray in the space + * @param aabb target AABB to raycast + * @param ray target ray in the space (inversed!) */ -float2 raycast2(const Aabb& aabb, const Ray& ray) noexcept; +static float2 raycast2I(Aabb aabb, Ray ray) noexcept +{ + auto t1 = (aabb.getMin() - ray.origin) * ray.getDirection(); + auto t2 = (aabb.getMax() - ray.origin) * ray.getDirection(); + auto tMin = select(ray.getParallel(), simd_f32_4::minusMax, min(t1, t2)); + auto tMax = select(ray.getParallel(), simd_f32_4::max, max(t1, t2)); + tMin = max(tMin, tMin.swizzle()); + tMin = max(tMin, tMin.swizzle()); + tMax = min(tMax, tMax.swizzle()); + tMax = min(tMax, tMax.swizzle()); + + auto noParallelOverlap = (ray.origin < aabb.getMin()) | (ray.origin > aabb.getMax()); + auto noIntersection = (tMin > tMax) | (tMax < simd_f32_4::zero) | (ray.getParallel() & noParallelOverlap); + noIntersection |= noIntersection.splatY(); + noIntersection |= noIntersection.splatZ(); + + return float2( + select(noIntersection, simd_f32_4::max, tMin).getX(), + select(noIntersection, simd_f32_4::minusMax, tMax).getX()); +} /** - * @brief Calculates where ray intersects the AABB. (Ray is inversed!) - * @return Distance to the intersection points. + * @brief Calculates where ray intersects the AABB in 3D space. + * @return Distance to the intersection points, or FLT_MAX; -FLT_MAX if no hit. * * @param[in] aabb target AABB to raycast - * @param[in] ray target ray in the space (inversed) + * @param ray target ray in the space */ -float2 raycast2I(const Aabb& aabb, const Ray& ray) noexcept; +static float2 raycast2(const Aabb& aabb, Ray ray) noexcept +{ + ray.setDirection(simd_f32_4::one / ray.getDirection()); + return raycast2I(aabb, ray); +} /** - * @brief Calculates where ray intersects the AABB. - * @return Distance to the first intersection point. + * @brief Calculates where ray intersects the AABB in 3D space. (Ray is inversed!) + * @return Distance to the first intersection point, or FLT_MAX; -FLT_MAX if no hit. * * @param[in] aabb target AABB to raycast - * @param[in] ray target ray in the space + * @param[in] ray target ray in the space (inversed) */ -float raycast1(const Aabb& aabb, const Ray& ray) noexcept; +static float raycast1I(const Aabb& aabb, const Ray& ray) noexcept +{ + auto t1 = (aabb.getMin() - ray.origin) * ray.getDirection(); + auto t2 = (aabb.getMax() - ray.origin) * ray.getDirection(); + auto tMin = select(ray.getParallel(), simd_f32_4::minusMax, min(t1, t2)); + auto tMax = select(ray.getParallel(), simd_f32_4::max, max(t1, t2)); + tMin = max(tMin, tMin.swizzle()); + tMin = max(tMin, tMin.swizzle()); + tMax = min(tMax, tMax.swizzle()); + tMax = min(tMax, tMax.swizzle()); + + auto noParallelOverlap = (ray.origin < aabb.getMin()) | (ray.origin > aabb.getMax()); + auto noIntersection = (tMin > tMax) | (tMax < simd_f32_4::zero) | (ray.getParallel() & noParallelOverlap); + noIntersection |= noIntersection.splatY(); + noIntersection |= noIntersection.splatZ(); + return select(noIntersection, simd_f32_4::max, tMin).getX(); +} /** - * @brief Calculates where ray intersects the AABB. (Ray is inversed!) - * @return Distance to the first intersection point. + * @brief Calculates where ray intersects the AABB in 3D space. + * @return Distance to the first intersection point, or FLT_MAX; -FLT_MAX if no hit. * * @param[in] aabb target AABB to raycast - * @param[in] ray target ray in the space (inversed) + * @param ray target ray in the space */ -float raycast1I(const Aabb& aabb, const Ray& ray) noexcept; +static float raycast1(const Aabb& aabb, Ray ray) noexcept +{ + ray.setDirection(simd_f32_4::one / ray.getDirection()); + return raycast1I(aabb, ray); +} /** * @brief Returns true if ray is intersecting the AABB. @@ -286,7 +357,7 @@ static constexpr bool isAabbIntersected(float2 raycastDists) noexcept } /** - * @brief Returns true if ray intersects the AABB. + * @brief Returns true if ray intersects the AABB in 3D space. * * @param[in] aabb target AABB to raycast * @param[in] ray target ray in the space @@ -314,18 +385,18 @@ static bool raycastI(const Aabb& aabb, const Ray& ray) noexcept */ static bool isIntersected(const Aabb& a, const Aabb& b) noexcept { - return areAllTrue(a.getMin() <= b.getMax()) && areAllTrue(b.getMin() <= a.getMax()); + return areAllTrue(a.getMin() <= b.getMax() & a.getMin() >= b.getMax()); } /** * @brief Returns true if triangle intersects the AABB. * @details Usefull for a fast 3D model voxelization. * - * @param[in] center target AABB center point in the space - * @param[in] extent target AABB extent (half size) + * @param position target AABB position in 3D space + * @param extent target AABB extent (half size) * @param[in] triangle target triangle in the space */ -bool isAabbIntersected(const float3& center, float3 extent, const Triangle& triangle) noexcept; +bool isAabbIntersected(simd_f32_4 position, simd_f32_4 extent, const Triangle& triangle) noexcept; /** * @brief Returns true if AABB is behind the frustum planes. @@ -339,9 +410,35 @@ bool isAabbIntersected(const float3& center, float3 extent, const Triangle& tria * @param[in] aabb target AABB to check * @param[in] model AABB transformation matrix * @param[in] planes target frustum planes - * @param[in] planeCount frustum plane count + * @param planeCount frustum plane count */ -bool isBehindFrustum(const Aabb& aabb, const float4x4& model, - const Plane* planes, uint8 planeCount = Plane::frustumCount) noexcept; +static bool isBehindFrustum(const Aabb& aabb, const simd_f32_4x4& model, + const Plane* planes, uint8 planeCount = Plane::frustumCount) noexcept +{ + auto min = aabb.getMin(), max = aabb.getMax(); + auto minX = min.getX(), minY = min.getY(), minZ = min.getZ(); + auto maxX = max.getX(), maxY = max.getY(), maxZ = max.getZ(); + + auto v0 = model * simd_f32_4(min, 1.0f); + auto v1 = model * simd_f32_4(minX, minY, maxZ, 1.0f); + auto v2 = model * simd_f32_4(minX, maxY, minZ, 1.0f); + auto v3 = model * simd_f32_4(minX, maxY, maxZ, 1.0f); + auto v4 = model * simd_f32_4(maxX, minY, minZ, 1.0f); + auto v5 = model * simd_f32_4(maxX, minY, maxZ, 1.0f); + auto v6 = model * simd_f32_4(maxX, maxY, minZ, 1.0f); + auto v7 = model * simd_f32_4(max, 1.0f); + + for (uint8 i = 0; i < planeCount; i++) + { + auto plane = planes[i]; + auto d0 = simd_f32_4(distance3(plane, v0), distance3(plane, v1), distance3(plane, v2), distance3(plane, v3)); + auto d1 = simd_f32_4(distance3(plane, v4), distance3(plane, v5), distance3(plane, v6), distance3(plane, v7)); // TODO: use simd_f32_8? + + if (areAllTrue(d0 < 0.0f & d1 < 0.0f)) + return true; + } + + return false; +} } // namespace math \ No newline at end of file diff --git a/include/math/angles.hpp b/include/math/angles.hpp index b3d4033..4700eb4 100644 --- a/include/math/angles.hpp +++ b/include/math/angles.hpp @@ -37,26 +37,22 @@ static constexpr float radians(float degrees) noexcept { return degrees * ((floa * @brief Converts specified vector in degrees to radians. * @param degrees target vector in degrees */ -static constexpr float2 radians(float2 degrees) noexcept -{ - return float2(radians(degrees.x), radians(degrees.y)); -} +static constexpr float2 radians(float2 degrees) noexcept { return degrees * ((float)M_PI / 180.0f); } /** * @brief Converts specified vector in degrees to radians. * @param degrees target vector in degrees */ -static constexpr float3 radians(float3 degrees) noexcept -{ - return float3(radians(degrees.x), radians(degrees.y), radians(degrees.z)); -} +static constexpr float3 radians(float3 degrees) noexcept { return degrees * ((float)M_PI / 180.0f); } /** * @brief Converts specified vector in degrees to radians. * @param degrees target vector in degrees */ -static constexpr float4 radians(float4 degrees) noexcept -{ - return float4(radians(degrees.x), radians(degrees.y), radians(degrees.z), radians(degrees.w)); -} +static constexpr float4 radians(float4 degrees) noexcept { return degrees * ((float)M_PI / 180.0f); } +/** + * @brief Converts specified SIMD vector in degrees to radians. + * @param degrees target SIMD vector in degrees + */ +static simd_f32_4 radians(simd_f32_4 degrees) noexcept { return degrees * (float)M_PI / 180.0f; } /** * @brief Converts specified value in radians to degrees. @@ -67,25 +63,21 @@ static constexpr float degrees(float radians) noexcept { return radians * (180.0 * @brief Converts specified vector in radians to degrees. * @param degrees target vector in radians */ -static constexpr float2 degrees(float2 radians) noexcept -{ - return float2(degrees(radians.x), degrees(radians.y)); -} +static constexpr float2 degrees(float2 radians) noexcept { return radians * (180.0f / (float)M_PI); } /** * @brief Converts specified vector in radians to degrees. * @param degrees target vector in radians */ -static constexpr float3 degrees(float3 radians) noexcept -{ - return float3(degrees(radians.x), degrees(radians.y), degrees(radians.z)); -} +static constexpr float3 degrees(float3 radians) noexcept { return radians * (180.0f / (float)M_PI); } /** * @brief Converts specified vector in radians to degrees. * @param degrees target vector in radians */ -static constexpr float4 degrees(float4 radians) noexcept -{ - return float4(degrees(radians.x), degrees(radians.y), degrees(radians.z), degrees(radians.w)); -} +static constexpr float4 degrees(float4 radians) noexcept { return radians * (180.0f / (float)M_PI); } +/** + * @brief Converts specified SIMD vector in radians to degrees. + * @param degrees target SIMD vector in radians + */ +static simd_f32_4 degrees(simd_f32_4 radians) noexcept { return radians * 180.0f / (float)M_PI; } }; // namespace math \ No newline at end of file diff --git a/include/math/bvh.hpp b/include/math/bvh.hpp index 96b1c58..9703441 100644 --- a/include/math/bvh.hpp +++ b/include/math/bvh.hpp @@ -38,38 +38,6 @@ namespace math */ struct Bvh { - /** - * @brief BVH hierarchy base node container. - * @details See the @ref Bvh::Node. - */ - struct BaseNode - { - float3 min = float3::zero; - uint32 primitiveCount = 0; - float3 max = float3::zero; - }; - /** - * @brief BVH hierarchy child node container. - * @details See the @ref Bvh::Node. - */ - struct ChildNode - { - float3 min = float3::zero; - uint32 primitiveCount = 0; - float3 max = float3::zero; - uint32 leftNode = 0; - }; - /** - * @brief BVH hierarchy leaf node container. - * @details See the @ref Bvh::Node. - */ - struct LeafNode - { - float3 min = float3::zero; - uint32 primitiveCount = 0; - float3 max = float3::zero; - uint32 firstPrimitive = 0; - }; /** * @brief BVH hierarchy node container. * @@ -80,29 +48,57 @@ struct Bvh */ union Node { - BaseNode base; - ChildNode child; - LeafNode leaf; - constexpr Node() : leaf() { } + Aabb aabb = {}; /** - * @brief Returns true if this is leaf node. (Contain the actual geometric objects) + * @brief Returns this leaf BVH node primitive count. */ - bool isLeaf() const noexcept { return base.primitiveCount; } + uint32 getPrimitiveCount() const noexcept { return aabb.getMin().uints.w; } /** - * @brief Returns node Axis Aligned Bounding Box. (AABB) + * @brief Sets this leaf BVH node primitive count. + * @param primitiveCount target leaf node primitive count */ - Aabb getAabb() const noexcept { return Aabb(base.min, base.max); } + void setPrimitiveCount(uint32 primitiveCount) noexcept + { + auto min = aabb.getMin(); + min.uints.w = primitiveCount; + aabb.setMin(min); + } + + /** + * @brief Returns this BVH node left child node identifier. + */ + uint32 getLeftNode() const noexcept { return aabb.getMax().getW(); } + /** + * @brief Sets this BVH node left child node identifier. + * @param nodeID target node left child node identifier + */ + void setLeftNode(uint32 nodeID) noexcept + { + auto max = aabb.getMax(); + max.uints.w = nodeID; + aabb.setMax(max); + } + /** - * @brief Sets node Axis Aligned Bounding Box. (AABB) - * @param[in] aabb target aabb value + * @brief Returns this leaf BVH node first primitive index. + */ + uint32 getFirstPrimitive() const noexcept { return getLeftNode(); } + /** + * @brief Sets this leaf BVH node first primitive index. + * @param primitiveIndex target node first primitive index + */ + void setFirstPrimitive(uint32 primitiveIndex) noexcept { setLeftNode(primitiveIndex); } + + /** + * @brief Returns true if this is leaf node. (Contain the actual geometric objects) */ - void setAabb(const Aabb& aabb) noexcept { base.min = aabb.getMin(), base.max = aabb.getMax(); } + bool isLeaf() const noexcept { return aabb.getMin().getW(); } }; protected: vector nodes; vector primitives; - vector centroids; + vector centroids; stack nodeStack; public: /******************************************************************************************************************* @@ -117,7 +113,7 @@ struct Bvh * @param centroids precalculated centroid array */ Bvh(const uint8* vertices, const uint8* indices, const Aabb& aabb, uint32 indexCount, - uint32 vertexSize, uint32 indexSize, const float3* centroids = nullptr); + uint32 vertexSize, uint32 indexSize, const simd_f32_4* centroids = nullptr); /** * @brief Creates a new BVH from the AABB array. * @@ -126,7 +122,7 @@ struct Bvh * @param aabbCount AABB array size * @param centroids precalculated centroid array */ - Bvh(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const float3* centroids = nullptr); + Bvh(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const simd_f32_4* centroids = nullptr); /** * @brief Creates a new empty BVH. */ @@ -143,7 +139,7 @@ struct Bvh /** * @brief Returns BVH centroid array. */ - const vector& getCentroids() const noexcept { return centroids; } + const vector& getCentroids() const noexcept { return centroids; } /** * @brief Recreates BVH from the triangle array. @@ -157,7 +153,7 @@ struct Bvh * @param centroids precalculated centroid array or null */ void recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb, uint32 indexCount, - uint32 vertexSize, uint32 indexSize, const float3* centroids = nullptr); + uint32 vertexSize, uint32 indexSize, const simd_f32_4* centroids = nullptr); /** * @brief Recreates BVH from the AABB array. * @@ -166,7 +162,7 @@ struct Bvh * @param aabbCount AABB array size * @param centroids precalculated centroid array or null */ - void recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const float3* centroids = nullptr); + void recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const simd_f32_4* centroids = nullptr); // TODO: add traverse function }; diff --git a/include/math/color-space.hpp b/include/math/color-space.hpp index 5447736..17e3d04 100644 --- a/include/math/color-space.hpp +++ b/include/math/color-space.hpp @@ -24,85 +24,90 @@ namespace math { /** - * @brief Converts linear RGB color to the sRGB color space. - * @param rgb target linear RGB color + * @brief Converts linear RGBA color to the sRGB color space. + * @param rgba target linear RGBA color */ -static float3 rgbToSrgb(float3 rgb) noexcept +static simd_f32_4 rgbToSrgb(simd_f32_4 rgba) noexcept { - constexpr auto c = 0.0031308f; - auto l = fma(pow(rgb, float3(1.0f / 2.4f)), float3(1.055f), float3(-0.055f)); - auto h = rgb * 12.92f; - return float3(rgb.x <= c ? h.x : l.x, rgb.y <= c ? h.y : l.y, rgb.z <= c ? h.z : l.z); + static const auto c = simd_f32_4(0.0031308f); + auto h = rgba * 12.92f; + auto l = fma(pow(rgba, simd_f32_4(1.0f / 2.4f)), simd_f32_4(1.055f), simd_f32_4(-0.055f)); + auto r = select(rgba <= c, h, l); r.setW(rgba.getW()); + return r; } /** - * @brief Converts sRGB color to the linear RGB color space. + * @brief Converts sRGB color to the linear RGBA color space. * @param srgb target sRGB color */ -static float3 srgbToRgb(float3 srgb) noexcept +static simd_f32_4 srgbToRgb(simd_f32_4 sRGB) noexcept { - constexpr auto c = 0.04045f; - auto l = pow((srgb + 0.055f) * (1.0f / 1.055f), float3(2.4f)); - auto h = srgb * (1.0f / 12.92f); - return float3(srgb.x <= c ? h.x : l.x, srgb.y <= c ? h.y : l.y, srgb.z <= c ? h.z : l.z); + static const auto c = simd_f32_4(0.04045f); + auto h = sRGB * (1.0f / 12.92f); + auto l = pow((sRGB + 0.055f) * (1.0f / 1.055f), simd_f32_4(2.4f)); + auto r = select(sRGB <= c, h, l); r.setW(sRGB.getW()); + return r; } /** * @brief Converts linear RGB color to the XYZ color space. (CIE 1931) - * @param[in] rgb target linear RGB color + * @param rgb target linear RGB color */ -static float3 rgbToXyz(const float3& rgb) noexcept +static simd_f32_4 rgbToXyz(simd_f32_4 rgb) noexcept { - constexpr auto m = float3x3 + static const auto m = simd_f32_4x4 ( - 0.4124564f, 0.2126729f, 0.0193339f, - 0.3575761f, 0.7151522f, 0.1191920f, - 0.1804375f, 0.0721750f, 0.9503041f + simd_f32_4(0.4124564f, 0.2126729f, 0.0193339f, 0.0f), + simd_f32_4(0.3575761f, 0.7151522f, 0.1191920f, 0.0f), + simd_f32_4(0.1804375f, 0.0721750f, 0.9503041f, 0.0f), + simd_f32_4::zero ); - return m * rgb; + return multiply3x3(m, rgb); } /** * @brief Converts XYZ color to the linear RGB color space. (CIE 1931) - * @param[in] xyz target XYZ color + * @param xyz target XYZ color */ -static float3 xyzToRgb(const float3& xyz) noexcept +static simd_f32_4 xyzToRgb(simd_f32_4 xyz) noexcept { - constexpr auto m = float3x3 + static const auto m = simd_f32_4x4 ( - 3.2404542f, -0.9692660f, 0.0556434f, - -1.5371385f, 1.8760108f, -0.2040259f, - -0.4985314f, 0.0415560f, 1.0572252f + simd_f32_4( 3.2404542f, -0.9692660f, 0.0556434f, 0.0f), + simd_f32_4(-1.5371385f, 1.8760108f, -0.2040259f, 0.0f), + simd_f32_4(-0.4985314f, 0.0415560f, 1.0572252f, 0.0f), + simd_f32_4::zero ); - return m * xyz; + return multiply3x3(m, xyz); } /** * @brief Converts XYZ color to the YXY color space. * @param xyz target XYZ color */ -static float3 xyzToYxy(float3 xyz) noexcept +static simd_f32_4 xyzToYxy(simd_f32_4 xyz) noexcept { - auto inv = 1.0f / dot(xyz, float3::one); - return float3(xyz.y, xyz.x * inv, xyz.y * inv); + auto y = xyz.getY(); + auto inv = 1.0f / dot3(xyz, simd_f32_4::one); + return simd_f32_4(y, xyz.getX() * inv, y * inv); } /** * @brief Converts YXY color to the XYZ color space. * @param yxy target YXY color */ -static float3 yxyToXyz(float3 yxy) noexcept +static simd_f32_4 yxyToXyz(simd_f32_4 yxy) noexcept { - return float3(yxy.x * yxy.y / yxy.z, yxy.x, - yxy.x * (1.0f - yxy.y - yxy.z) / yxy.z); + auto x = yxy.getX(), y = yxy.getY(), z = yxy.getZ(); + return simd_f32_4(x * y / z, x, x * (1.0f - y - z) / z); } /** * @brief Converts linear RGB color to the YXY color space. - * @param[in] rgb target linear RGB color + * @param rgb target linear RGB color */ -static float3 rgbToYxy(const float3& rgb) noexcept { return xyzToYxy(rgbToXyz(rgb)); } +static simd_f32_4 rgbToYxy(simd_f32_4 rgb) noexcept { return xyzToYxy(rgbToXyz(rgb)); } /** * @brief Converts YXY color to the linear RGB color space. - * @param[in] yxy target YXY color + * @param yxy target YXY color */ -static float3 yxyToRgb(const float3& yxy) noexcept { return xyzToRgb(yxyToXyz(yxy)); } +static simd_f32_4 yxyToRgb(simd_f32_4 yxy) noexcept { return xyzToRgb(yxyToXyz(yxy)); } } // namespace math \ No newline at end of file diff --git a/include/math/color.hpp b/include/math/color.hpp index 253ec99..95dcbcd 100644 --- a/include/math/color.hpp +++ b/include/math/color.hpp @@ -69,24 +69,14 @@ struct [[nodiscard]] Color a = toInt32(string(hex.c_str() + 6, 2)); } - /** - * @brief Creates a new sRGB color structure from the normalized R channel. (Red) - * @param normR target normalized R and G channel color values - */ - constexpr explicit Color(float normR) - { - r = (uint8)(std::clamp(normR, 0.0f, 1.0f) * 255.0f + 0.5f); - g = b = a = 0; - } /** * @brief Creates a new sRGB color structure from the normalized R and G channels. (Red, Green) * @param normRg target normalized R and G channel color values */ constexpr explicit Color(float2 normRg) { - r = (uint8)(std::clamp(normRg.x, 0.0f, 1.0f) * 255.0f + 0.5f); - g = (uint8)(std::clamp(normRg.y, 0.0f, 1.0f) * 255.0f + 0.5f); - b = a = 0; + auto c = clamp(normRg, float2(0.0f), float2(1.0f)) * 255.0f + 0.5f; + r = (uint8)c.x, g = (uint8)c.y, b = a = 0; } /** * @brief Creates a new sRGB color structure from the normalized RGB channels. (Red, Green, Blue) @@ -94,10 +84,8 @@ struct [[nodiscard]] Color */ constexpr explicit Color(float3 normRgb) { - r = (uint8)(std::clamp(normRgb.x, 0.0f, 1.0f) * 255.0f + 0.5f); - g = (uint8)(std::clamp(normRgb.y, 0.0f, 1.0f) * 255.0f + 0.5f); - b = (uint8)(std::clamp(normRgb.z, 0.0f, 1.0f) * 255.0f + 0.5f); - a = 0; + auto c = clamp(normRgb, float3(0.0f), float3(1.0f)) * 255.0f + 0.5f; + r = (uint8)c.x, g = (uint8)c.y, b = (uint8)c.z, a = 0; } /** * @brief Creates a new sRGB color structure from the normalized RGBA channels. (Red, Green, Blue, Alpha) @@ -105,36 +93,35 @@ struct [[nodiscard]] Color */ constexpr explicit Color(float4 normRgba) { - r = (uint8)(std::clamp(normRgba.x, 0.0f, 1.0f) * 255.0f + 0.5f); - g = (uint8)(std::clamp(normRgba.y, 0.0f, 1.0f) * 255.0f + 0.5f); - b = (uint8)(std::clamp(normRgba.z, 0.0f, 1.0f) * 255.0f + 0.5f); - a = (uint8)(std::clamp(normRgba.w, 0.0f, 1.0f) * 255.0f + 0.5f); + auto c = clamp(normRgba, float4(0.0f), float4(1.0f)) * 255.0f + 0.5f; + r = (uint8)c.x, g = (uint8)c.y, b = (uint8)c.z, a = (uint8)c.w; + } + /** + * @brief Creates a new sRGB color structure from the normalized RGBA channels. (Red, Green, Blue, Alpha) + * @param normRgba target normalized RGBA channel color SIMD values + */ + explicit Color(simd_f32_4 normRgba) + { + auto c = clamp(normRgba, simd_f32_4::zero, simd_f32_4::one) * 255.0f + 0.5f; + r = (uint8)c.getX(), g = (uint8)c.getY(), b = (uint8)c.getZ(), a = (uint8)c.getW(); } /******************************************************************************************************************* * @brief Converts sRGB color to the normalized RG vector. (Red, Green) */ - constexpr explicit operator float2() const noexcept - { - constexpr auto mul = (1.0f / 255.0f); - return float2(r * mul, g * mul); - } + constexpr explicit operator float2() const noexcept { return float2(r, g) * (1.0f / 255.0f); } /** * @brief Converts sRGB color to the normalized RGB vector. (Red, Green, Blue) */ - constexpr explicit operator float3() const noexcept - { - constexpr auto mul = (1.0f / 255.0f); - return float3(r * mul, g * mul, b * mul); - } + constexpr explicit operator float3() const noexcept { return float3(r, g, b) * (1.0f / 255.0f); } /** * @brief Converts sRGB color to the normalized RGBA vector. (Red, Green, Blue, Alpha) */ - constexpr explicit operator float4() const noexcept - { - constexpr auto mul = (1.0f / 255.0f); - return float4(r * mul, g * mul, b * mul, a * mul); - } + constexpr explicit operator float4() const noexcept { return float4(r, g, b, a) * (1.0f / 255.0f); } + /** + * @brief Converts sRGB color to the normalized RGBA SIMD vector. (Red, Green, Blue, Alpha) + */ + explicit operator simd_f32_4() const noexcept { return simd_f32_4(r, g, b, a) * (1.0f / 255.0f); } /** * @brief Returns color binary data. */ @@ -219,31 +206,12 @@ struct [[nodiscard]] Color /** * @brief Converts sRGB color to the normalized linear RGB color space. */ - float4 toLinear4() const noexcept - { - auto srgb = (float4)*this; - return float4(srgbToRgb((float3)srgb), srgb.w); - } - /** - * @brief Converts sRGB color to the normalized linear RGB color space. - */ - float3 toLinear3() const noexcept { return srgbToRgb((float3)*this); } + simd_f32_4 toLinear() const noexcept { return srgbToRgb((simd_f32_4)*this); } /** * @brief Converts normalized linear RGB color to the sRGB color space. */ - static Color fromLinear4(const float4& normRGBA) noexcept - { - auto srgb = rgbToSrgb((float3)normRGBA); - return Color(float4(srgb, normRGBA.w)); - } - /** - * @brief Converts normalized linear RGB color to the sRGB color space. - */ - static Color fromLinear3(const float3& normRGB) noexcept - { - return Color(rgbToSrgb(normRGB)); - } + static Color fromLinear(simd_f32_4 normRGBA) noexcept { return Color(rgbToSrgb(normRGBA)); } //****************************************************************************************************************** constexpr bool operator==(Color c) const noexcept { return r == c.r && g == c.g && b == c.b && a == c.a; } diff --git a/include/math/line.hpp b/include/math/line.hpp index 82c6ef7..1dba932 100644 --- a/include/math/line.hpp +++ b/include/math/line.hpp @@ -31,16 +31,16 @@ namespace math struct [[nodiscard]] Line { /** - * @brief Line start point in the space. + * @brief Line start point in 3D space. */ simd_f32_4 start = simd_f32_4::zero; /** - * @brief Line end point in the space. + * @brief Line end point in 3D space. */ simd_f32_4 end = simd_f32_4::zero; /** - * @brief Creates a new line structure. + * @brief Creates a new line structure. (In 3D space) * * @param start target line start point in the space * @param end target line end point in the space @@ -52,7 +52,7 @@ struct [[nodiscard]] Line Line() = default; /** - * @brief Returns line direction vector. + * @brief Returns line direction vector in 3D space. * @param normalize is direction vector should be normalized */ simd_f32_4 getDirection(bool normalize = true) const noexcept @@ -71,10 +71,10 @@ struct [[nodiscard]] Line }; /** - * @brief Returns closest point on line to the specified one. + * @brief Returns closest point on line to the specified one in 3D space. * * @param[in] line target line to use - * @param point target point in the space + * @param point target point in 3D space */ static simd_f32_4 closestPoint(const Line& line, simd_f32_4 point) noexcept { @@ -83,10 +83,10 @@ static simd_f32_4 closestPoint(const Line& line, simd_f32_4 point) noexcept return fma(d, simd_f32_4(std::clamp(t, 0.0f, 1.0f)), a); } /** - * @brief Returns closest point on line to the specified one. + * @brief Returns closest point on line to the specified one in 3D space. * * @param[in] line target line to use - * @param point target point in the space + * @param point target point in 3D space * @param[out] t distance to the closest point */ static simd_f32_4 closestPoint(const Line& line, simd_f32_4 point, float& t) noexcept diff --git a/include/math/matrix/float.hpp b/include/math/matrix/float.hpp index 3e77c20..ed72adf 100644 --- a/include/math/matrix/float.hpp +++ b/include/math/matrix/float.hpp @@ -18,7 +18,7 @@ * * Coordinate system: X-right, Y-up, Z-forward (Left handed) * Matrices order: Column-major. (OpenGL, Vulkan like) - * floatNxM - N columns and M rows + * floatCxR -where C columns and R rows * * Based on this project: https://github.com/g-truc/glm */ @@ -40,9 +40,9 @@ struct [[nodiscard]] float2x2 /** * @brief Creates a new floating point 2x2 matrix structure. - * @param v target value for all columns and rows + * @param n target value for all columns and rows */ - constexpr explicit float2x2(float v = 0.0f) noexcept : c0(v), c1(v) { } + constexpr explicit float2x2(float n = 0.0f) noexcept : c0(n), c1(n) { } /** * @brief Creates a new floating point 2x2 matrix structure. * @@ -79,14 +79,14 @@ struct [[nodiscard]] float2x2 return ((float2*)this)[i]; } - constexpr float2x2 operator+(float v) const noexcept { return float2x2(c0 + v, c1 + v); } - constexpr float2x2 operator-(float v) const noexcept { return float2x2(c0 - v, c1 - v); } - constexpr float2x2 operator*(float v) const noexcept { return float2x2(c0 * v, c1 * v); } - constexpr float2x2 operator/(float v) const noexcept { return float2x2(c0 / v, c1 / v); } - float2x2& operator+=(float v) noexcept { c0 += v; c1 += v; return *this; } - float2x2& operator-=(float v) noexcept { c0 -= v; c1 -= v; return *this; } - float2x2& operator*=(float v) noexcept { c0 *= v; c1 *= v; return *this; } - float2x2& operator/=(float v) noexcept { c0 /= v, c1 /= v; return *this; } + constexpr float2x2 operator+(float n) const noexcept { return float2x2(c0 + n, c1 + n); } + constexpr float2x2 operator-(float n) const noexcept { return float2x2(c0 - n, c1 - n); } + constexpr float2x2 operator*(float n) const noexcept { return float2x2(c0 * n, c1 * n); } + constexpr float2x2 operator/(float n) const noexcept { return float2x2(c0 / n, c1 / n); } + float2x2& operator+=(float n) noexcept { c0 += n; c1 += n; return *this; } + float2x2& operator-=(float n) noexcept { c0 -= n; c1 -= n; return *this; } + float2x2& operator*=(float n) noexcept { c0 *= n; c1 *= n; return *this; } + float2x2& operator/=(float n) noexcept { c0 /= n, c1 /= n; return *this; } /** * @brief Calculates dot product between two matrices. @@ -147,9 +147,9 @@ struct [[nodiscard]] float3x3 /** * @brief Creates a new floating point 3x3 matrix structure. - * @param v target value for all columns and rows + * @param n target value for all columns and rows */ - constexpr explicit float3x3(float v = 0.0f) noexcept : c0(v), c1(v), c2(v) { } + constexpr explicit float3x3(float n = 0.0f) noexcept : c0(n), c1(n), c2(n) { } /** * @brief Creates a new floating point 3x3 matrix structure. * @@ -194,14 +194,14 @@ struct [[nodiscard]] float3x3 return ((float3*)this)[i]; } - constexpr float3x3 operator+(float v) const noexcept { return float3x3(c0 + v, c1 + v, c2 + v); } - constexpr float3x3 operator-(float v) const noexcept { return float3x3(c0 - v, c1 - v, c2 - v); } - constexpr float3x3 operator*(float v) const noexcept { return float3x3(c0 * v, c1 * v, c2 * v); } - constexpr float3x3 operator/(float v) const noexcept { return float3x3(c0 / v, c1 / v, c2 / v); } - float3x3& operator+=(float v) noexcept { c0 += v; c1 += v; c2 += v; return *this; } - float3x3& operator-=(float v) noexcept { c0 -= v; c1 -= v; c2 -= v; return *this; } - float3x3& operator*=(float v) noexcept { c0 *= v; c1 *= v; c2 *= v; return *this; } - float3x3& operator/=(float v) noexcept { c0 /= v, c1 /= v; c2 /= v; return *this; } + constexpr float3x3 operator+(float n) const noexcept { return float3x3(c0 + n, c1 + n, c2 + n); } + constexpr float3x3 operator-(float n) const noexcept { return float3x3(c0 - n, c1 - n, c2 - n); } + constexpr float3x3 operator*(float n) const noexcept { return float3x3(c0 * n, c1 * n, c2 * n); } + constexpr float3x3 operator/(float n) const noexcept { return float3x3(c0 / n, c1 / n, c2 / n); } + float3x3& operator+=(float n) noexcept { c0 += n; c1 += n; c2 += n; return *this; } + float3x3& operator-=(float n) noexcept { c0 -= n; c1 -= n; c2 -= n; return *this; } + float3x3& operator*=(float n) noexcept { c0 *= n; c1 *= n; c2 *= n; return *this; } + float3x3& operator/=(float n) noexcept { c0 /= n, c1 /= n; c2 /= n; return *this; } /** * @brief Calculates dot product between two matrices. @@ -266,15 +266,16 @@ struct [[nodiscard]] float4x3 /** * @brief Creates a new floating point 4x3 matrix structure. - * @param v target value for all columns and rows + * @param n target value for all columns and rows */ - constexpr explicit float4x3(float v = 0.0f) noexcept : c0(v), c1(v), c2(v), c3(v) { } + constexpr explicit float4x3(float n = 0.0f) noexcept : c0(n), c1(n), c2(n), c3(n) { } /** * @brief Creates a new floating point 4x3 matrix structure. * * @param[in] c0 first matrix column value * @param[in] c1 second matrix column value * @param[in] c2 third matrix column value + * @param[in] c3 fourth matrix column value */ constexpr float4x3(const float3& c0, const float3& c1, const float3& c2, const float3& c3) noexcept : c0(c0), c1(c1), c2(c2), c3(c3) { } @@ -319,14 +320,14 @@ struct [[nodiscard]] float4x3 return ((float3*)this)[i]; } - constexpr float4x3 operator+(float v) const noexcept { return float4x3(c0 + v, c1 + v, c2 + v, c3 + v); } - constexpr float4x3 operator-(float v) const noexcept { return float4x3(c0 - v, c1 - v, c2 - v, c3 - v); } - constexpr float4x3 operator*(float v) const noexcept { return float4x3(c0 * v, c1 * v, c2 * v, c3 * v); } - constexpr float4x3 operator/(float v) const noexcept { return float4x3(c0 / v, c1 / v, c2 / v, c3 / v); } - float4x3& operator+=(float v) noexcept { c0 += v; c1 += v; c2 += v; c3 += v; return *this; } - float4x3& operator-=(float v) noexcept { c0 -= v; c1 -= v; c2 -= v; c3 -= v; return *this; } - float4x3& operator*=(float v) noexcept { c0 *= v; c1 *= v; c2 *= v; c3 *= v; return *this; } - float4x3& operator/=(float v) noexcept { c0 /= v, c1 /= v; c2 /= v; c3 /= v; return *this; } + constexpr float4x3 operator+(float n) const noexcept { return float4x3(c0 + n, c1 + n, c2 + n, c3 + n); } + constexpr float4x3 operator-(float n) const noexcept { return float4x3(c0 - n, c1 - n, c2 - n, c3 - n); } + constexpr float4x3 operator*(float n) const noexcept { return float4x3(c0 * n, c1 * n, c2 * n, c3 * n); } + constexpr float4x3 operator/(float n) const noexcept { return float4x3(c0 / n, c1 / n, c2 / n, c3 / n); } + float4x3& operator+=(float n) noexcept { c0 += n; c1 += n; c2 += n; c3 += n; return *this; } + float4x3& operator-=(float n) noexcept { c0 -= n; c1 -= n; c2 -= n; c3 -= n; return *this; } + float4x3& operator*=(float n) noexcept { c0 *= n; c1 *= n; c2 *= n; c3 *= n; return *this; } + float4x3& operator/=(float n) noexcept { c0 /= n, c1 /= n; c2 /= n; c3 /= n; return *this; } /** * @brief Calculates dot product between matrix and vector. @@ -375,9 +376,9 @@ struct [[nodiscard]] float4x4 /** * @brief Creates a new floating point 4x4 matrix structure. - * @param v target value for all columns and rows + * @param n target value for all columns and rows */ - constexpr explicit float4x4(float v = 0.0f) noexcept : c0(v), c1(v), c2(v), c3(v) { } + constexpr explicit float4x4(float n = 0.0f) noexcept : c0(n), c1(n), c2(n), c3(n) { } /** * @brief Creates a new floating point 4x4 matrix structure. * @@ -391,13 +392,14 @@ struct [[nodiscard]] float4x4 /** * @brief Creates a new floating point 4x4 matrix structure. * - * @param[in] c0 first matrix column value - * @param[in] c1 second matrix column value - * @param[in] c2 third matrix column value - * @param[in] c3 fourth matrix column value + * @param[in] m4x3 target 4x3 matrix + * @param c0r3 first matrix column fourth component value + * @param c1r3 second matrix column fourth component value + * @param c2r3 third matrix column fourth component value + * @param c3r3 fourth matrix column fourth component value */ - constexpr float4x4(const float4x3& m4x3, float c0, float c1, float c2, float c3) noexcept : - c0(float4(m4x3.c0, c0)), c1(float4(m4x3.c1, c1)), c2(float4(m4x3.c2, c2)), c3(float4(m4x3.c3, c3)) { } + constexpr float4x4(const float4x3& m4x3, float c0r3, float c1r3, float c2r3, float c3r3) noexcept : + c0(float4(m4x3.c0, c0r3)), c1(float4(m4x3.c1, c1r3)), c2(float4(m4x3.c2, c2r3)), c3(float4(m4x3.c3, c3r3)) { } /** * @brief Creates a new floating point 4x4 matrix structure. * @details See the @ref float4x4. @@ -416,7 +418,9 @@ struct [[nodiscard]] float4x4 * @brief Returns matrix 4x3 part. */ constexpr explicit operator float4x3() const noexcept - { return float4x3((float3)c0, (float3)c1, (float3)c2, (float3)c3); } + { + return float4x3((float3)c0, (float3)c1, (float3)c2, (float3)c3); + } /** * @brief Returns matrix 3x3 part. */ @@ -445,14 +449,14 @@ struct [[nodiscard]] float4x4 return ((float4*)this)[i]; } - constexpr float4x4 operator+(float v) const noexcept { return float4x4(c0 + v, c1 + v, c2 + v, c3 + v); } - constexpr float4x4 operator-(float v) const noexcept { return float4x4(c0 - v, c1 - v, c2 - v, c3 - v); } - constexpr float4x4 operator*(float v) const noexcept { return float4x4(c0 * v, c1 * v, c2 * v, c3 * v); } - constexpr float4x4 operator/(float v) const noexcept { return float4x4(c0 / v, c1 / v, c2 / v, c3 / v); } - float4x4& operator+=(float v) noexcept { c0 += v; c1 += v; c2 += v; c3 += v; return *this; } - float4x4& operator-=(float v) noexcept { c0 -= v; c1 -= v; c2 -= v; c3 -= v; return *this; } - float4x4& operator*=(float v) noexcept { c0 *= v; c1 *= v; c2 *= v; c3 *= v; return *this; } - float4x4& operator/=(float v) noexcept { c0 /= v, c1 /= v; c2 /= v; c3 /= v; return *this; } + constexpr float4x4 operator+(float n) const noexcept { return float4x4(c0 + n, c1 + n, c2 + n, c3 + n); } + constexpr float4x4 operator-(float n) const noexcept { return float4x4(c0 - n, c1 - n, c2 - n, c3 - n); } + constexpr float4x4 operator*(float n) const noexcept { return float4x4(c0 * n, c1 * n, c2 * n, c3 * n); } + constexpr float4x4 operator/(float n) const noexcept { return float4x4(c0 / n, c1 / n, c2 / n, c3 / n); } + float4x4& operator+=(float n) noexcept { c0 += n; c1 += n; c2 += n; c3 += n; return *this; } + float4x4& operator-=(float n) noexcept { c0 -= n; c1 -= n; c2 -= n; c3 -= n; return *this; } + float4x4& operator*=(float n) noexcept { c0 *= n; c1 *= n; c2 *= n; c3 *= n; return *this; } + float4x4& operator/=(float n) noexcept { c0 /= n, c1 /= n; c2 /= n; c3 /= n; return *this; } /** * @brief Calculates dot product between two matrices. @@ -513,49 +517,49 @@ inline const float4x4 float4x4::one = float4x4(1.0f); inline const float4x4 float4x4::minusOne = float4x4(-1.0f); //********************************************************************************************************************** -static constexpr float4x4 operator+(float v, const float4x4& m) noexcept +static constexpr float4x4 operator+(float n, const float4x4& m) noexcept { - return float4x4(v + m.c0, v + m.c1, v + m.c2, v + m.c3); + return float4x4(n + m.c0, n + m.c1, n + m.c2, n + m.c3); } -static constexpr float4x4 operator-(float v, const float4x4& m) noexcept +static constexpr float4x4 operator-(float n, const float4x4& m) noexcept { - return float4x4(v - m.c0, v - m.c1, v - m.c2, v - m.c3); + return float4x4(n - m.c0, n - m.c1, n - m.c2, n - m.c3); } -static constexpr float4x4 operator*(float v, const float4x4& m) noexcept +static constexpr float4x4 operator*(float n, const float4x4& m) noexcept { - return float4x4(v * m.c0, v * m.c1, v * m.c2, v * m.c3); + return float4x4(n * m.c0, n * m.c1, n * m.c2, n * m.c3); } -static constexpr float4x4 operator/(float v, const float4x4& m) noexcept +static constexpr float4x4 operator/(float n, const float4x4& m) noexcept { - return float4x4(v / m.c0, v / m.c1, v / m.c2, v / m.c3); + return float4x4(n / m.c0, n / m.c1, n / m.c2, n / m.c3); } -static constexpr float4x3 operator+(float v, const float4x3& m) noexcept +static constexpr float4x3 operator+(float n, const float4x3& m) noexcept { - return float4x3(v + m.c0, v + m.c1, v + m.c2, v + m.c3); + return float4x3(n + m.c0, n + m.c1, n + m.c2, n + m.c3); } -static constexpr float4x3 operator-(float v, const float4x3& m) noexcept +static constexpr float4x3 operator-(float n, const float4x3& m) noexcept { - return float4x3(v - m.c0, v - m.c1, v - m.c2, v - m.c3); + return float4x3(n - m.c0, n - m.c1, n - m.c2, n - m.c3); } -static constexpr float4x3 operator*(float v, const float4x3& m) noexcept +static constexpr float4x3 operator*(float n, const float4x3& m) noexcept { - return float4x3(v * m.c0, v * m.c1, v * m.c2, v * m.c3); + return float4x3(n * m.c0, n * m.c1, n * m.c2, n * m.c3); } -static constexpr float4x3 operator/(float v, const float4x3& m) noexcept +static constexpr float4x3 operator/(float n, const float4x3& m) noexcept { - return float4x3(v / m.c0, v / m.c1, v / m.c2, v / m.c3); + return float4x3(n / m.c0, n / m.c1, n / m.c2, n / m.c3); } -static constexpr float3x3 operator+(float v, const float3x3& m) noexcept { return float3x3(v + m.c0, v + m.c1, v + m.c2); } -static constexpr float3x3 operator-(float v, const float3x3& m) noexcept { return float3x3(v - m.c0, v - m.c1, v - m.c2); } -static constexpr float3x3 operator*(float v, const float3x3& m) noexcept { return float3x3(v * m.c0, v * m.c1, v * m.c2); } -static constexpr float3x3 operator/(float v, const float3x3& m) noexcept { return float3x3(v / m.c0, v / m.c1, v / m.c2); } +static constexpr float3x3 operator+(float n, const float3x3& m) noexcept { return float3x3(n + m.c0, n + m.c1, n + m.c2); } +static constexpr float3x3 operator-(float n, const float3x3& m) noexcept { return float3x3(n - m.c0, n - m.c1, n - m.c2); } +static constexpr float3x3 operator*(float n, const float3x3& m) noexcept { return float3x3(n * m.c0, n * m.c1, n * m.c2); } +static constexpr float3x3 operator/(float n, const float3x3& m) noexcept { return float3x3(n / m.c0, n / m.c1, n / m.c2); } -static constexpr float2x2 operator+(float v, const float2x2& m) noexcept { return float2x2(v + m.c0, v + m.c1); } -static constexpr float2x2 operator-(float v, const float2x2& m) noexcept { return float2x2(v - m.c0, v - m.c1); } -static constexpr float2x2 operator*(float v, const float2x2& m) noexcept { return float2x2(v * m.c0, v * m.c1); } -static constexpr float2x2 operator/(float v, const float2x2& m) noexcept { return float2x2(v / m.c0, v / m.c1); } +static constexpr float2x2 operator+(float n, const float2x2& m) noexcept { return float2x2(n + m.c0, n + m.c1); } +static constexpr float2x2 operator-(float n, const float2x2& m) noexcept { return float2x2(n - m.c0, n - m.c1); } +static constexpr float2x2 operator*(float n, const float2x2& m) noexcept { return float2x2(n * m.c0, n * m.c1); } +static constexpr float2x2 operator/(float n, const float2x2& m) noexcept { return float2x2(n / m.c0, n / m.c1); } /*********************************************************************************************************************** * @brief Calculates dot product between vector and matrix. @@ -664,7 +668,7 @@ static constexpr float3x3 inverse(float3x3 m) noexcept } /** * @brief Calculates matrix inverse. (Usefull for undoing transformations) - * @param[in] matrix target matrix to inverse + * @param[in] m target matrix to inverse */ static float4x4 inverse(float4x4 m) noexcept { diff --git a/include/math/matrix/projection.hpp b/include/math/matrix/projection.hpp index 41ddd20..d4cb81e 100644 --- a/include/math/matrix/projection.hpp +++ b/include/math/matrix/projection.hpp @@ -17,10 +17,6 @@ * @brief Common projection matrix functions. * * @details - * Coordinate system: X-right, Y-up, Z-forward (Left handed) - * Matrices order: Column-major. (OpenGL, Vulkan like) - * floatNxM - N columns and M rows - * * Projection matrices are fixed for the Vulkan NDC space. * http://matthewwellings.com/blog/the-new-vulkan-coordinate-system/ * diff --git a/include/math/matrix/transform.hpp b/include/math/matrix/transform.hpp index 0788e10..3d3db81 100644 --- a/include/math/matrix/transform.hpp +++ b/include/math/matrix/transform.hpp @@ -15,17 +15,11 @@ /*********************************************************************************************************************** * @file * @brief Common matrix transformation functions. - * - * @details - * Coordinate system: X-right, Y-up, Z-forward (Left handed) - * Matrices order: Column-major. (OpenGL, Vulkan like) - * floatNxM - N columns and M rows - * - * Based on this project: https://github.com/g-truc/glm + * @details Based on this project: https://github.com/g-truc/glm */ #pragma once -#include "math/matrix.hpp" +#include "math/simd/matrix/transform.hpp" namespace math { @@ -49,16 +43,6 @@ static constexpr void setTranslation(float4x4& m, float3 t) noexcept m.c3 = float4(t, m.c3.w); } -/** - * @brief Applies translation transformation to an object in 3D space. (r = m * translate(t)) - * - * @param[in] m target model matrix to translate - * @param t target object translation - */ -static constexpr float4x4 translate(const float4x4& m, float3 t) noexcept -{ - return float4x4(m.c0, m.c1, m.c2, m.c0 * t.x + m.c1 * t.y + m.c2 * t.z + m.c3); -} /** * @brief Creates a new matrix with a specified object position in 3D space. * @param t target object translation @@ -71,22 +55,41 @@ static constexpr float4x4 translate(float3 t) noexcept 0.0f, 0.0f, 1.0f, t.z, 0.0f, 0.0f, 0.0f, 1.0f); } - - - /** - * @brief Adds translation of an object in 3D space to the matrix. + * @brief Applies translation transformation to an object in 3D space. [r = m * translate(t)] * - * @param[out] m target model matrix to use - * @param t target object position to add + * @param[in] m target model matrix to translate + * @param t target object translation */ -static constexpr void addTranslation(float4x4& m, float3 t) noexcept +static constexpr float4x4 translate(const float4x4& m, float3 t) noexcept { - m.c3 = float4((float3)m.c3 + t, m.c3.w); + return float4x4(m.c0, m.c1, m.c2, m.c0 * t.x + m.c1 * t.y + m.c2 * t.z + m.c3); +} +/** + * @brief Applies translation transformation to an object in 3D space. [r = translate(t) * m] + * + * @param t target object translation + * @param[in] m target model matrix to translate + */ +static constexpr float4x4 translate(float3 t, const float4x4& m) noexcept +{ + return float4x4(m.c0, m.c1, m.c2, float4(getTranslation(m) + t, m.c3.w)); } /*********************************************************************************************************************** - * @brief Applies scale transformation to an object in 3D space. + * @brief Creates a new matrix with a specified object scale in 3D space. + * @param s target object scale + */ +static constexpr float4x4 scale(float3 s) noexcept +{ + return float4x4( + s.x, 0.0f, 0.0f, 0.0f, + 0.0f, s.y, 0.0f, 0.0f, + 0.0f, 0.0f, s.z, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f); +} +/** + * @brief Applies scale transformation to an object in 3D space. [r = m * scale(s)] * * @param[in] m target model matrix to scale * @param s target object scale @@ -96,16 +99,15 @@ static constexpr float4x4 scale(const float4x4& m, float3 s) noexcept return float4x4(m.c0 * s.x, m.c1 * s.y, m.c2 * s.z, m.c3); } /** - * @brief Creates a new matrix with a specified object scale in 3D space. + * @brief Applies scale transformation to an object in 3D space. [r = scale(s) * m] + * + * @param[in] m target model SIMD matrix to scale * @param s target object scale */ -static constexpr float4x4 scale(float3 s) noexcept +static constexpr float4x4 scale(float3 s, const float4x4& m) noexcept { - return float4x4( - s.x, 0.0f, 0.0f, 0.0f, - 0.0f, s.y, 0.0f, 0.0f, - 0.0f, 0.0f, s.z, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f); + auto sm = scale(s); + return float4x4(sm * m.c0, sm * m.c1, sm * m.c2, sm * m.c3); } /** @@ -144,11 +146,11 @@ static constexpr float4x4 rotate(quat q) noexcept /** * @brief Calculates a new rotation matrix from look vectors in 3D space. * - * @param[in] lookFrom viewer position in 3D space - * @param[in] lookTo object position in 3D space - * @param[in] up space up direction vector + * @param lookFrom viewer position in 3D space + * @param lookTo object position in 3D space + * @param up space up direction vector */ -static float4x4 rotate(const float3& lookFrom, const float3& lookTo, const float3& up = float3::top) noexcept +static float4x4 rotate(float3 lookFrom, float3 lookTo, float3 up = float3::top) noexcept { auto f = normalize(lookTo - lookFrom); auto s = normalize(cross(up, f)); @@ -163,10 +165,10 @@ static float4x4 rotate(const float3& lookFrom, const float3& lookTo, const float /** * @brief Calculates a new matrix from direction vector in 3D space. * - * @param[in] front view direction vector in 3D space - * @param[in] up space up direction vector + * @param front view direction vector in 3D space + * @param up space up direction vector */ -static float4x4 rotate(const float3& front, const float3& up = float3::top) noexcept +static float4x4 rotate(float3 front, float3 up = float3::top) noexcept { auto f = front; auto s = normalize(cross(up, f)); @@ -191,12 +193,25 @@ static float4x4 extractRotation(const float4x4& m) noexcept auto c2 = (float3)m.c2 * invScale.z; return float4x4(float4(c0, 0.0f), float4(c1, 0.0f), float4(c2, 0.0f), float4(0.0f, 0.0f, 0.0f, 1.0f)); } +/** + * @brief Extracts total matrix rotation transformation of an object in 3D space. + * @warning Matrix should be only translated and/or rotated, without scaling!!! + * @param[in] m target model matrix to extract from + */ +static float4x4 extractRotationOnly(const float4x4& m) noexcept +{ + return float4x4( + float4((float3)m.c0, 0.0f), + float4((float3)m.c1, 0.0f), + float4((float3)m.c2, 0.0f), + float4(0.0f, 0.0f, 0.0f, 1.0f)); +} /** * @brief Extracts total matrix rotation quaternion of an object in 3D space. - * @param[in] m target rotation matrix to extract from + * @param m target rotation matrix to extract from */ -static quat extractQuat(const float3x3& m) noexcept +static quat extractQuat(float3x3 m) noexcept { auto fourXSquaredMinus1 = m.c0.x - m.c1.y - m.c2.z; auto fourYSquaredMinus1 = m.c1.y - m.c0.x - m.c2.z; @@ -249,10 +264,11 @@ static quat extractQuat(const float4x4& m) noexcept * @param[in] rotation object rotation in 3D space * @param[in] scale object scale in 3D space */ -static float4x4 calcModel(const float3& position = float3::zero, - const quat& rotation = quat::identity, const float3& scale = float3::one) noexcept +static float4x4 calcModel(float3 position = float3::zero, + quat rotation = quat::identity, float3 scale = float3::one) noexcept { - return translate(position) * rotate(normalize(rotation)) * math::scale(scale); + return scale == float3::one ? translate(position, rotate(normalize(rotation))) : + translate(position) * rotate(normalize(rotation)) * math::scale(scale); } /** * @brief Extracts total matrix position, rotation and scale of an object in 3D space. (Decompose) @@ -262,21 +278,33 @@ static float4x4 calcModel(const float3& position = float3::zero, * @param[out] scale object scale in 3D space * @param[out] rotation object rotation in 3D space */ -static void extractTransform(const float4x4& m, float3& position, float3& scale, quat& rotation) noexcept +static void extractTransform(const float4x4& m, float3& position, quat& rotation, float3& scale) noexcept { position = getTranslation(m); - scale = extractScale(m); rotation = extractQuat(extractRotation(m)); + scale = extractScale(m); +} +/** + * @brief Extracts total matrix position and rotation of an object in 3D space. (Decompose) + * + * @param[in] m target model matrix to extract from + * @param[out] position object position in 3D space + * @param[out] rotation object rotation in 3D space + */ +static void extractTransform(const float4x4& m, float3& position, quat& rotation) noexcept +{ + position = getTranslation(m); + rotation = extractQuat(extractRotationOnly(m)); } /*********************************************************************************************************************** * @brief Calculates a new model matrix from look vectors in 3D space. * - * @param[in] from viewer position in 3D space - * @param[in] to object position in 3D space - * @param[in] up space up direction vector + * @param from viewer position in 3D space + * @param to object position in 3D space + * @param up space up direction vector */ -static float4x4 lookAt(const float3& from, const float3& to, const float3& up = float3::top) noexcept +static float4x4 lookAt(float3 from, float3 to, float3 up = float3::top) noexcept { auto f = normalize(to - from); auto s = normalize(cross(up, f)); @@ -290,10 +318,10 @@ static float4x4 lookAt(const float3& from, const float3& to, const float3& up = /** * @brief Calculates a new quaternion from direction vector in 3D space. * - * @param[in] direction viewer direction in 3D space - * @param[in] up space up direction vector + * @param direction viewer direction in 3D space + * @param up space up direction vector */ -static quat lookAtQuat(const float3& direction, const float3& up = float3::top) noexcept +static quat lookAtQuat(float3 direction, float3 up = float3::top) noexcept { auto right = cross(up, direction); auto c0 = right * (1.0f / std::sqrt(std::max(0.00001f, dot(right, right)))); @@ -302,4 +330,16 @@ static quat lookAtQuat(const float3& direction, const float3& up = float3::top) return extractQuat(m); } +/** + * @brief Calculates translated/rotated matrix inverse. + * @warning Matrix should be only translated and/or rotated, without scaling!!! + * @param[in] m target matrix to inverse + */ +static float4x4 inverseTransRot(const float4x4& m) +{ + auto t = transpose(m); + setTranslation(t, -((float3x3)t * getTranslation(m))); + return t; +} + } // namespace math \ No newline at end of file diff --git a/include/math/plane.hpp b/include/math/plane.hpp index 42a0a3a..001e481 100644 --- a/include/math/plane.hpp +++ b/include/math/plane.hpp @@ -38,22 +38,18 @@ struct [[nodiscard]] Plane */ static constexpr uint8 frustumCount = 6; protected: - simd_f32_4 normal = simd_f32_4::zero; + simd_f32_4 normDist = simd_f32_4::zero; public: - /** - * @brief Distance to the plane in 3D space. - */ - float distance = 0.0f; /** - * @brief Creates a new plane structure. + * @brief Creates a new plane structure. (In 3D space) * - * @param normal target plane normal vector + * @param normal target plane normal vector in 3D space * @param distance target distance to the plane * @param normalize is normal vector should be normalized */ Plane(simd_f32_4 normal, float distance = 0.0f, bool normalize = true) noexcept : - normal(normalize ? normalize3(normal) : normal), distance(distance) { } + normDist(normalize ? normalize3(normal) : normal) { normDist.setW(distance); } /** * @brief Creates a new empty plane structure. */ @@ -65,34 +61,42 @@ struct [[nodiscard]] Plane */ Plane(const Triangle& triangle) noexcept { - normal = cross3(triangle.p1 - triangle.p0, triangle.p2 - triangle.p0); + auto normal = cross3(triangle.p1 - triangle.p0, triangle.p2 - triangle.p0); if (length3(normal) > 0.0f) normal = normalize3(normal); - distance = dot3(normal, triangle.p0); + auto distance = dot3(normal, triangle.p0); + normDist = simd_f32_4(normal, distance); } /** - * @brief Returns plane normal vector. + * @brief Returns plane normal vector in 3D space. */ - simd_f32_4 getNormal() const noexcept { return normal; } + const simd_f32_4& getNormal() const noexcept { return normDist; } /** - * @brief Sets plane normal vector. + * @brief Sets plane normal vector in 3D space. * - * @param normal target plane normal vector + * @param normal target plane normal vector in 3D space * @param normalize is normal vector should be normalized */ void setNormal(simd_f32_4 normal, bool normalize = true) noexcept { - this->normal = normalize ? normalize3(normal) : normal; + normDist = simd_f32_4(normalize ? normalize3(normal) : normal, normDist.getW()); } + + /** + * @brief Returns distance to the plane. + */ + float getDistance() const noexcept { return normDist.getW(); } + /** + * @brief Sets distance to the plane. + * @param distance target distance to the plane + */ + void setDistance(float distance) noexcept { return normDist.setW(distance); } + /** * @brief Normalizes plane normal vector. */ - void normalize() noexcept - { - auto l = length3(normal); - normal /= l; distance /= l; - } + void normalize() noexcept { normDist /= length3(normDist); } static const Plane left, right, bottom, top, back, front; }; @@ -112,10 +116,10 @@ inline const Plane Plane::front = Plane(simd_f32_4(0.0f, 0.0f, 1.0f, 0.0f), 0.0f */ static float distance3(const Plane& plane, simd_f32_4 point) noexcept { - return dot3(plane.getNormal(), point) + plane.distance; + return dot3(plane.getNormal(), point) + plane.getDistance(); } /** - * @brief Returns closest point on the plane to specified one. + * @brief Returns closest point on the plane to specified one in 3D space. * * @param[in] plane target plane to use * @param[in] point target point in 3D space @@ -126,7 +130,7 @@ static simd_f32_4 closestPoint(const Plane& plane, simd_f32_4 point) noexcept } /** - * @brief Returns closest point on the triangle to specified one. + * @brief Returns closest point on the triangle to specified one in 3D space. * * @param[in] plane target plane to use * @param[in] point target point in 3D space @@ -137,14 +141,14 @@ static simd_f32_4 closestPoint(const Triangle& triangle, simd_f32_4 point) noexc if (isInside(triangle, p)) return p; - auto ab = Line(triangle.p0, triangle.p1),bc = Line(triangle.p1, triangle.p2), ca = Line(triangle.p2, triangle.p0); + auto ab = Line(triangle.p0, triangle.p1), bc = Line(triangle.p1, triangle.p2), ca = Line(triangle.p2, triangle.p0); auto c0 = closestPoint(ab, p), c1 = closestPoint(bc, p), c2 = closestPoint(ca, p); - auto m0 = lengthSq3(p - c0), m1 = lengthSq3(p - c1), m2 = lengthSq3(p - c2); - auto m = min(m0, m1, m2); + auto v = simd_f32_4(lengthSq3(p - c0), lengthSq3(p - c1), lengthSq3(p - c2)); + auto m = min3(v); - if (m == m0) + if (m == v.getX()) return c0; - if (m == m1) + if (m == v.getY()) return c1; return c2; } @@ -158,16 +162,16 @@ static simd_f32_4 closestPoint(const Triangle& triangle, simd_f32_4 point) noexc */ static void extractFrustumPlanes(const simd_f32_4x4& frustum, Plane* planes) noexcept { - auto t = transpose(frustum); + auto t = transpose4x4(frustum); + auto c0W = t.c0.getW(), c1W = t.c1.getW(), c2W = t.c2.getW(), c3W = t.c3.getW(); // Gribb & Hartmann method - planes[0] = Plane(simd_f32_4(frustum.columns[0].getW() + frustum.columns[0].getX(), frustum.columns[1].getW() + frustum.columns[1].getX(), frustum.columns[2].getW() + frustum.columns[2].getX(), 0.0f), frustum.columns[3].getW() + frustum.columns[3].getX(), false); - planes[1] = Plane(simd_f32_4(frustum.columns[0].getW() - frustum.columns[0].getX(), frustum.columns[1].getW() - frustum.columns[1].getX(), frustum.columns[2].getW() - frustum.columns[2].getX(), 0.0f), frustum.columns[3].getW() - frustum.columns[3].getX(), false); - planes[2] = Plane(simd_f32_4(frustum.columns[0].getW() - frustum.columns[0].getY(), frustum.columns[1].getW() - frustum.columns[1].getY(), frustum.columns[2].getW() - frustum.columns[2].getY(), 0.0f), frustum.columns[3].getW() - frustum.columns[3].getY(), false); // Flipped Vulkan NDC! - planes[3] = Plane(simd_f32_4(frustum.columns[0].getW() + frustum.columns[0].getY(), frustum.columns[1].getW() + frustum.columns[1].getY(), frustum.columns[2].getW() + frustum.columns[2].getY(), 0.0f), frustum.columns[3].getW() + frustum.columns[3].getY(), false); // Flipped Vulkan NDC! - planes[4] = Plane(simd_f32_4(frustum.columns[0].getZ() , frustum.columns[1].getZ() , frustum.columns[2].getZ() , 0.0f), frustum.columns[3].getZ() , false); - planes[5] = Plane(simd_f32_4(frustum.columns[0].getW() - frustum.columns[0].getZ(), frustum.columns[1].getW() - frustum.columns[1].getZ(), frustum.columns[2].getW() - frustum.columns[2].getZ(), 0.0f), frustum.columns[3].getW() - frustum.columns[3].getZ(), false); - // TODO: maybe we can optimize this with swizzle or other + planes[0] = Plane(simd_f32_4(t.c3 + t.c0, 0.0f), c3W + c0W, false); + planes[1] = Plane(simd_f32_4(t.c3 - t.c0, 0.0f), c3W - c0W, false); + planes[2] = Plane(simd_f32_4(t.c3 - t.c1, 0.0f), c3W - c1W, false); // Flipped Vulkan NDC! + planes[3] = Plane(simd_f32_4(t.c3 + t.c1, 0.0f), c3W + c1W, false); // Flipped Vulkan NDC! + planes[4] = Plane(simd_f32_4(t.c2, 0.0f), c2W, false); + planes[5] = Plane(simd_f32_4(t.c3 - t.c2, 0.0f), c3W - c2W, false); } /** * @brief Normalizes specified planes. diff --git a/include/math/quaternion.hpp b/include/math/quaternion.hpp index 9f0b054..81ba543 100644 --- a/include/math/quaternion.hpp +++ b/include/math/quaternion.hpp @@ -14,7 +14,7 @@ /*********************************************************************************************************************** * @file - * @brief Common quaternion functions. + * @brief Common floating point quaternion functions. * * @details * Quaternion is a mathematical concept used to represent 3D rotations. It consists of four components (one real part @@ -27,7 +27,7 @@ */ #pragma once -#include "math/vector/float.hpp" +#include "math/simd/quaternion.hpp" namespace math { @@ -40,30 +40,31 @@ struct [[nodiscard]] quat : public float4 { /** * @brief Creates a new quaternion structure. - * - * @param x imaginary vector X part. - * @param y imaginary vector Y part. - * @param z imaginary vector Z part. - * @param w real scalar part. + * @note Identity quaternion represents rotation of 0 degrees around all axis. + * @param xyzw quaternion parts vector */ - constexpr quat(float x, float y, float z, float w) noexcept : float4(x, y, z, w) { } + constexpr quat(float4 xyzw = identity) noexcept : float4(xyzw) { } /** - * @brief Creates a new identity quaternion structure. - * @param[in] xyzw quaternion parts vector. + * @brief Creates a new quaternion structure. + * + * @param x imaginary vector X part + * @param y imaginary vector Y part + * @param z imaginary vector Z part + * @param w real scalar part */ - constexpr quat(const float4& xyzw = quat::identity) noexcept : float4(xyzw) { } + constexpr quat(float x, float y, float z, float w) noexcept : float4(x, y, z, w) { } /** * @brief Creates a new quaternion structure from the euler angles. (In radians) * @param[in] eulerAngles target euler angles vector */ - quat(const float3& eulerAngles) noexcept + quat(float3 eulerAngles) noexcept { - auto sin = math::sin(eulerAngles * 0.5f); - auto cos = math::cos(eulerAngles * 0.5f); - x = sin.x * cos.y * cos.z - cos.x * sin.y * sin.z; - y = cos.x * sin.y * cos.z + sin.x * cos.y * sin.z; - z = cos.x * cos.y * sin.z - sin.x * sin.y * cos.z; - w = cos.x * cos.y * cos.z + sin.x * sin.y * sin.z; + auto s = sin(eulerAngles * 0.5f); + auto c = cos(eulerAngles * 0.5f); + x = s.x * c.y * c.z - c.x * s.y * s.z; + y = c.x * s.y * c.z + s.x * c.y * s.z; + z = c.x * c.y * s.z - s.x * s.y * c.z; + w = c.x * c.y * c.z + s.x * s.y * s.z; } /** * @brief Creates a new quaternion structure from the angle and axis. (In radians) @@ -73,10 +74,10 @@ struct [[nodiscard]] quat : public float4 */ quat(float angle, float3 axis) noexcept { - auto sinus = std::sin(angle * 0.5f); - x = axis.x * sinus; - y = axis.y * sinus; - z = axis.z * sinus; + auto s = std::sin(angle * 0.5f); + x = axis.x * s; + y = axis.y * s; + z = axis.z * s; w = std::cos(angle * 0.5f); } @@ -94,50 +95,45 @@ struct [[nodiscard]] quat : public float4 } /** * @brief Rotates vector by this quaternion. - * @param[in] v target vector to rotate + * @param v target vector to rotate */ - constexpr float3 operator*(const float3& v) const noexcept + float3 operator*(float3 v) const noexcept { - auto q = float3(x, y, z); - auto cq = math::cross(q, v); - auto ccq = math::cross(q, cq); - return v + ((cq * w) + ccq) * 2.0f; + auto q = (float3)*this; + auto cq = math::cross(q, v), ccq = math::cross(q, cq); + return fma(fma(cq, float3(w), ccq), float3(2.0f), v); } /** * @brief Rotates this quaternion by the specified one. - * @param[in] q target quaternion to rotate by + * @param q target quaternion to rotate by */ - quat& operator*=(const quat& q) noexcept { return *this = *this * q; } + quat& operator*=(quat q) noexcept { return *this = *this * q; } /** * @brief Extracts quaternion rotation around X axis. (In radians) */ - float getPitch() const noexcept + float extractPitch() const noexcept { auto yy = 2.0f * (y * z + w * x); auto xx = w * w - x * x - y * y + z * z; // Avoid atan2(0, 0) - handle singularity - Matiis - if (std::abs(xx) <= numeric_limits::epsilon() || - std::abs(yy) <= numeric_limits::epsilon()) - { + if (std::abs(xx) <= FLT_EPSILON || std::abs(yy) <= FLT_EPSILON) return 2.0f * std::atan2(x, w); - } - return std::atan2(yy, xx); } /** * @brief Extracts quaternion around Y axis. (In radians) */ - float getYaw() const noexcept { return std::asin(std::clamp(-2.0f * (x * z - w * y), -1.0f, 1.0f)); } + float extractYaw() const noexcept { return std::asin(std::clamp(-2.0f * (x * z - w * y), -1.0f, 1.0f)); } /** * @brief Extracts quaternion around Z axis. (In radians) */ - float getRoll() const noexcept { return std::atan2(2.0f * (x * y + w * z), w * w + x * x - y * y - z * z); } + float extractRoll() const noexcept { return std::atan2(2.0f * (x * y + w * z), w * w + x * x - y * y - z * z); } /** * @brief Calculates quaternion euler angles. (In radians) */ - float3 toEulerAngles() const noexcept { return float3(getPitch(), getYaw(), getRoll()); } + float3 extractEulerAngles() const noexcept { return float3(extractPitch(), extractYaw(), extractRoll()); } /** * @brief Quaternion with zero rotation. @@ -151,40 +147,32 @@ inline const quat quat::identity = quat(0.0f, 0.0f, 0.0f, 1.0f); * @brief Normalizes quaternion. * @param q target quaternion to normalize */ -static quat normalize(quat q) noexcept -{ - auto l = length(q); - assert(l > 0.0f); - auto il = (1.0f / l); - return quat(q.x * il, q.y * il, q.z * il, q.w * il); -} +static quat normalize(quat q) noexcept { return (float4)q * (1.0f / length(q)); } + /** * @brief Quaternion spherical linear interpolation from a to b. * * @param a starting quaternion value - * @param[in] b target quaternion value + * @param b target quaternion value * @param t interpolation value (from 0.0 to 1.0) */ -static quat slerp(quat a, const quat& b, float t) noexcept +static quat slerp(quat a, quat b, float t) noexcept { - auto cosTheta = dot(a, b); - auto c = (float4)b; - + auto cosTheta = dot(a, b); auto c = (float4)b; if (cosTheta < 0.0f) { c = -b; cosTheta = -cosTheta; } - if (cosTheta > 1.0f - numeric_limits::epsilon()) + if (cosTheta > 1.0f - FLT_EPSILON) { - return quat(lerp(a.x, c.x, t), lerp(a.y, c.y, t), lerp(a.z, c.z, t), lerp(a.w, c.w, t)); + return lerp(a, c, t); } else { auto angle = std::acos(cosTheta); - return ((float4)a * std::sin((1.0f - t) * angle) + - c * std::sin(t * angle)) / std::sin(angle); + return ((float4)a * std::sin((1.0f - t) * angle) + c * std::sin(t * angle)) / std::sin(angle); } } @@ -195,16 +183,16 @@ static quat slerp(quat a, const quat& b, float t) noexcept static constexpr quat conjugate(quat q) noexcept { return quat(-q.x, -q.y, -q.z, q.w); } /** * @brief Calculates inverse of the quaternion. - * @param[in] q target quaternion to inverse + * @param q target quaternion to inverse */ -static constexpr quat inverse(const quat& q) noexcept { return conjugate(q) / dot(q, q); } +static constexpr quat inverse(quat q) noexcept { return conjugate(q) / dot(q, q); } /** * @brief Rotates vector by the inversed quaternion. * - * @param[in] v target vector to rotate - * @param[in] q target quaternion to use + * @param v target vector to rotate + * @param q target quaternion to use */ -static constexpr float3 operator*(const float3& v, const quat& q) noexcept { return inverse(q) * v; } +static float3 operator*(float3 v, quat q) noexcept { return inverse(q) * v; } } // namespace math \ No newline at end of file diff --git a/include/math/ray.hpp b/include/math/ray.hpp index c0653f5..401e74f 100644 --- a/include/math/ray.hpp +++ b/include/math/ray.hpp @@ -32,6 +32,7 @@ struct [[nodiscard]] Ray { protected: simd_f32_4 direction = simd_f32_4::zero; + simd_u32_4 isParallel = simd_u32_4::zero; public: /** * @brief Ray origin point in 3D space. @@ -39,10 +40,10 @@ struct [[nodiscard]] Ray simd_f32_4 origin = simd_f32_4::zero; /** - * @brief Creates a new ray structure. + * @brief Creates a new ray structure. (In 3D space) * - * @param[in] origin ray origin point in 3D space - * @param[in] direction ray direction vector in 3D space + * @param origin ray origin point in 3D space + * @param direction ray direction vector in 3D space * @param normalize is direction vector should be normalized */ Ray(simd_f32_4 origin, simd_f32_4 direction, bool normalize = true) noexcept : @@ -55,18 +56,24 @@ struct [[nodiscard]] Ray /** * @brief Returns ray direction vector in 3D space. */ - simd_f32_4 getDirection() const noexcept { return direction; } + const simd_f32_4& getDirection() const noexcept { return direction; } /** * @brief Sets ray direction vector in 3D space. * - * @param[in] direction target ray direction vector + * @param direction target ray direction vector * @param normalize is direction vector should be normalized */ void setDirection(simd_f32_4 direction, bool normalize = true) noexcept { this->direction = normalize ? normalize3(direction) : direction; + isParallel = abs(direction) <= simd_f32_4(1.0e-20f); } + /** + * @brief Returns which of the ray direction axes are parallel. + */ + const simd_u32_4& getParallel() const noexcept { return isParallel; } + /** * @brief Normalizes ray direction vector. */ @@ -92,7 +99,7 @@ inline const Ray Ray::back = Ray(simd_f32_4::zero, simd_f32_4(0.0f, 0.0f, -1.0f, inline const Ray Ray::front = Ray(simd_f32_4::zero, simd_f32_4(0.0f, 0.0f, 1.0f, 0.0f), false); /** - * @brief Returns closest point on ray to the specified one. + * @brief Returns closest point on ray to the specified one in 3D space. * * @param[in] ray target ray to use * @param point target point in 3D space diff --git a/include/math/sh.hpp b/include/math/sh.hpp index c6234c9..d6390fa 100644 --- a/include/math/sh.hpp +++ b/include/math/sh.hpp @@ -32,6 +32,8 @@ #include "math/ibl.hpp" #include "math/matrix.hpp" +// TODO: use SIMD where possible, but check if SH results are identical. + namespace math::sh { diff --git a/include/math/simd/matrix/float.hpp b/include/math/simd/matrix/float.hpp index c28f670..fe914b8 100644 --- a/include/math/simd/matrix/float.hpp +++ b/include/math/simd/matrix/float.hpp @@ -19,18 +19,19 @@ #pragma once #include "math/matrix/float.hpp" -#include "math/simd/vector/float.hpp" +#include "math/simd/quaternion.hpp" namespace math { /** * @brief SIMD 32bit floating point 4x4 matrix structure. + * @details Commonly used for basic transformations: translation, scale, rotation, etc. * @note Use it when you know how to implement a faster vectorized code. */ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4 { - simd_f32_4 columns[4]; + simd_f32_4 c0, c1, c2, c3; /** * @brief Creates a new zero initialized SIMD 32bit floating point 4x4 matrix structure. @@ -45,24 +46,35 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4 * @param c3 fourth matrix column value */ simd_f32_4x4(simd_f32_4 c0, simd_f32_4 c1, simd_f32_4 c2, simd_f32_4 c3) noexcept : - columns { c0, c1, c2, c3 } { }; + c0(c0), c1(c1), c2(c2), c3(c3) { } /** * @brief Creates a new SIMD 32bit floating point 4x4 matrix structure. - * @param xyzw target value for all matrix vector components + * @warning This constructor duplicates second column vector to the third column! + * + * @param c0 first matrix column value + * @param c1 second matrix column value + * @param c2 third matrix column value + */ + simd_f32_4x4(simd_f32_4 c0, simd_f32_4 c1, simd_f32_4 c2) noexcept : + c0(c0), c1(c1), c2(c2), c3(c2) { } + /** + * @brief Creates a new SIMD 32bit floating point 4x4 matrix structure. + * @param n target value for all matrix vector components */ - explicit simd_f32_4x4(float v) noexcept : - columns { simd_f32_4(v), simd_f32_4(v), simd_f32_4(v), simd_f32_4(v) } { }; + explicit simd_f32_4x4(float n) noexcept : c0(n), c1(n), c2(n), c3(n) { } /** * @brief Creates a new SIMD 32bit floating point 4x4 matrix structure. - * @param v target 4x4 matrix value + * @param[in] m target 4x4 matrix value */ - explicit simd_f32_4x4(const float4x4& v) noexcept { *this = *((const simd_f32_4x4*)&v); } + explicit simd_f32_4x4(const float4x4& m) noexcept { *this = *((const simd_f32_4x4*)&m); } /** * @brief Creates a new SIMD 32bit floating point 4x4 matrix structure. - * @param v target 3x3 matrix value + * + * @param[in] m target 3x3 matrix value + * @param c3 third columns SIMD vector */ - explicit simd_f32_4x4(const float3x3& v, simd_f32_4 c3 = simd_f32_4::zero) noexcept : columns - { (simd_f32_4)float4(v.c0, v.c0.z), (simd_f32_4)float4(v.c1, v.c1.z), (simd_f32_4)float4(v.c2, v.c2.z), c3 } { } + explicit simd_f32_4x4(const float3x3& m, simd_f32_4 c3 = simd_f32_4::zero) noexcept : + c0(float4(m.c0, m.c0.z)), c1(float4(m.c1, m.c1.z)), c2(float4(m.c2, m.c2.z)), c3(c3) { } /** * @brief Returns SIMD matrix column by index. @@ -71,7 +83,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4 simd_f32_4& operator[](psize i) noexcept { assert(i <= 3); - return columns[i]; + return ((simd_f32_4*)this)[i]; } /** * @brief Returns SIMD matrix column by index. @@ -80,49 +92,75 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4 simd_f32_4 operator[](psize i) const noexcept { assert(i <= 3); - return columns[i]; + return ((simd_f32_4*)this)[i]; + } + + /** + * @brief Returns SIMD matrix as 4x4 floating point matrix. + */ + explicit operator float4x4() const noexcept + { + return *((const float4x4*)this); } + /** + * @brief Returns SIMD matrix as 4x3 floating point matrix. + */ + explicit operator float4x3() const noexcept + { + return float4x3((float3)c0, (float3)c1, (float3)c2, (float3)c3); + } + /** + * @brief Returns SIMD matrix as 3x3 floating point matrix. + */ + explicit operator float3x3() const noexcept + { + return float3x3((float3)c0, (float3)c1, (float3)c2); + } + /** + * @brief Returns SIMD matrix as 3x3 floating point matrix. + */ + explicit operator float2x2() const noexcept { return float2x2((float2)c0, (float2)c1); } /******************************************************************************************************************* * @brief Adds specified value to the all SIMD matrix columns. - * @param v target multiplier value + * @param n target value to add */ - simd_f32_4x4 operator+(float v) const noexcept + simd_f32_4x4 operator+(float n) const noexcept { - auto t = simd_f32_4(v); - return simd_f32_4x4(columns[0] + t, columns[1] + t, columns[2] + t, columns[3] + t); + auto t = simd_f32_4(n); + return simd_f32_4x4(c0 + t, c1 + t, c2 + t, c3 + t); } /** * @brief Subdivides specified value from the all SIMD matrix columns. - * @param v target multiplier value + * @param n target value to subdivide */ - simd_f32_4x4 operator-(float v) const noexcept + simd_f32_4x4 operator-(float n) const noexcept { - auto t = simd_f32_4(v); - return simd_f32_4x4(columns[0] - t, columns[1] - t, columns[2] - t, columns[3] - t); + auto t = simd_f32_4(n); + return simd_f32_4x4(c0 - t, c1 - t, c2 - t, c3 - t); } /** * @brief Multiplies all SIMD matrix columns by the specified value. - * @param v target multiplier value + * @param n target value to multiply by */ - simd_f32_4x4 operator*(float v) const noexcept + simd_f32_4x4 operator*(float n) const noexcept { - auto t = simd_f32_4(v); - return simd_f32_4x4(columns[0] * t, columns[1] * t, columns[2] * t, columns[3] * t); + auto t = simd_f32_4(n); + return simd_f32_4x4(c0 * t, c1 * t, c2 * t, c3 * t); } /** * @brief Divides all SIMD matrix columns by the specified value. - * @param v target multiplier value + * @param n target value to divide by */ - simd_f32_4x4 operator/(float v) const noexcept + simd_f32_4x4 operator/(float n) const noexcept { - auto t = simd_f32_4(v); - return simd_f32_4x4(columns[0] / t, columns[1] / t, columns[2] / t, columns[3] / t); + auto t = simd_f32_4(n); + return simd_f32_4x4(c0 / t, c1 / t, c2 / t, c3 / t); } /** * @brief Calculates dot product between two SIMD matrices. - * @param m target SIMD matrix to dot by + * @param[in] m target SIMD matrix to dot by */ simd_f32_4x4 operator*(const simd_f32_4x4& m) const noexcept { @@ -130,25 +168,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4 #if defined(MATH_SIMD_SUPPORT_SSE) for (int i = 0; i < 4; ++i) { - auto c = m.columns[i].data; - auto r = _mm_mul_ps(columns[0].data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0))); - r = _mm_add_ps(r, _mm_mul_ps(columns[1].data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)))); - r = _mm_add_ps(r, _mm_mul_ps(columns[2].data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)))); - r = _mm_add_ps(r, _mm_mul_ps(columns[3].data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(3, 3, 3, 3)))); - result.columns[i].data = r; + auto c = m[i].data; + auto r = _mm_mul_ps(c0.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0))); + r = _mm_add_ps(r, _mm_mul_ps(c1.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)))); + r = _mm_add_ps(r, _mm_mul_ps(c2.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)))); + r = _mm_add_ps(r, _mm_mul_ps(c3.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(3, 3, 3, 3)))); + result[i].data = r; } #elif defined(MATH_SIMD_SUPPORT_NEON) for (int i = 0; i < 4; ++i) { - auto c = m.columns[i].data; - auto r = vmulq_f32(columns[0].data, vdupq_laneq_f32(c, 0)); - r = vmlaq_f32(r, columns[1].data, vdupq_laneq_f32(c, 1)); - r = vmlaq_f32(r, columns[2].data, vdupq_laneq_f32(c, 2)); - r = vmlaq_f32(r, columns[3].data, vdupq_laneq_f32(c, 3)); - result.columns[i].data = r; + auto c = m[i].data; + auto r = vmulq_f32(c0.data, vdupq_laneq_f32(c, 0)); + r = vmlaq_f32(r, c1.data, vdupq_laneq_f32(c, 1)); + r = vmlaq_f32(r, c2.data, vdupq_laneq_f32(c, 2)); + r = vmlaq_f32(r, c3.data, vdupq_laneq_f32(c, 3)); + result[i].data = r; } #else - result = simd_f32_4x4((*((const float4x4*)this) * *((const float4x4*)&m))); + result = simd_f32_4x4((*((const float4x4*)this) * (*((const float4x4*)&m)))); #endif return result; } @@ -159,19 +197,19 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4 simd_f32_4 operator*(simd_f32_4 v) const noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - auto r = _mm_mul_ps(columns[0].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0))); - r = _mm_add_ps(r, _mm_mul_ps(columns[1].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1)))); - r = _mm_add_ps(r, _mm_mul_ps(columns[2].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2)))); - r = _mm_add_ps(r, _mm_mul_ps(columns[3].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 3, 3, 3)))); + auto r = _mm_mul_ps(c0.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0))); + r = _mm_add_ps(r, _mm_mul_ps(c1.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1)))); + r = _mm_add_ps(r, _mm_mul_ps(c2.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2)))); + r = _mm_add_ps(r, _mm_mul_ps(c3.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 3, 3, 3)))); return r; #elif defined(MATH_SIMD_SUPPORT_NEON) - auto r = vmulq_f32(mCcolumnsol[0].data, vdupq_laneq_f32(v.data, 0)); - r = vmlaq_f32(r, columns[1].data, vdupq_laneq_f32(v.data, 1)); - r = vmlaq_f32(r, columns[2].data, vdupq_laneq_f32(v.data, 2)); - r = vmlaq_f32(r, columns[3].data, vdupq_laneq_f32(v.data, 3)); + auto r = vmulq_f32(c0.data, vdupq_laneq_f32(v.data, 0)); + r = vmlaq_f32(r, c1.data, vdupq_laneq_f32(v.data, 1)); + r = vmlaq_f32(r, c2.data, vdupq_laneq_f32(v.data, 2)); + r = vmlaq_f32(r, c3.data, vdupq_laneq_f32(v.data, 3)); return r; #else - return simd_f32_4((*((const float4x4*)this) * *((const float4*)&v))); + return simd_f32_4((*((const float4x4*)this) * (*((const float4*)&v)))); #endif } @@ -183,12 +221,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4 bool operator==(const simd_f32_4x4& m) const noexcept { - return areAllTrue(compare(columns[0], m.columns[0]) & compare(columns[1], m.columns[1]) & - compare(columns[2], m.columns[2]) & compare(columns[3], m.columns[3])); + return areAllTrue(equal(c0, m.c0) & equal(c1, m.c1) & equal(c2, m.c2) & equal(c3, m.c3)); + } + bool operator!=(const simd_f32_4x4& m) const noexcept + { + return areAnyTrue(notEqual(c0, m.c0) | notEqual(c1, m.c1) | notEqual(c2, m.c2) | notEqual(c3, m.c3)); } - bool operator!=(const simd_f32_4x4& m) const noexcept { return !(*this == m); } - static const simd_f32_4x4 zero, one, minusOne, inf, minusInf, nan, identity; + static const simd_f32_4x4 zero, one, minusOne, inf, minusInf, nan, min, max, epsilon, identity; }; //********************************************************************************************************************** @@ -198,6 +238,9 @@ inline const simd_f32_4x4 simd_f32_4x4::minusOne = simd_f32_4x4(-1.0f); inline const simd_f32_4x4 simd_f32_4x4::inf = simd_f32_4x4(INFINITY); inline const simd_f32_4x4 simd_f32_4x4::minusInf = simd_f32_4x4(-INFINITY); inline const simd_f32_4x4 simd_f32_4x4::nan = simd_f32_4x4(NAN); +inline const simd_f32_4x4 simd_f32_4x4::min = simd_f32_4x4(FLT_MIN); +inline const simd_f32_4x4 simd_f32_4x4::max = simd_f32_4x4(FLT_MAX); +inline const simd_f32_4x4 simd_f32_4x4::epsilon = simd_f32_4x4(FLT_EPSILON); inline const simd_f32_4x4 simd_f32_4x4::identity = simd_f32_4x4( simd_f32_4(1.0f, 0.0f, 0.0f, 0.0f), simd_f32_4(0.0f, 1.0f, 0.0f, 0.0f), @@ -208,26 +251,89 @@ inline const simd_f32_4x4 simd_f32_4x4::identity = simd_f32_4x4( /** * @brief Returns true if first SIMD matrix binary representation is less than the second. * - * @param a first SIMD matrix to binary compare - * @param b second SIMD matrix to binary compare + * @param[in] a first SIMD matrix to binary compare + * @param[in] b second SIMD matrix to binary compare */ -static bool isBinaryLess(simd_f32_4x4 a, simd_f32_4x4 b) noexcept { return memcmp(&a, &b, sizeof(simd_f32_4x4)) < 0; } +static bool isBinaryLess(const simd_f32_4x4& a, const simd_f32_4x4& b) noexcept +{ + return memcmp(&a, &b, sizeof(simd_f32_4x4)) < 0; +} + +/** + * @brief Calculates 3x3 dot product between two SIMD matrices. + * @param[in] a first SIMD matrix to use + * @param[in] b second SIMD matrix to use + */ +static simd_f32_4x4 multiply3x3(const simd_f32_4x4& a, const simd_f32_4x4& b) noexcept +{ + simd_f32_4x4 result; + #if defined(MATH_SIMD_SUPPORT_SSE) + for (int i = 0; i < 4; ++i) + { + auto c = b[i].data; + auto r = _mm_mul_ps(a.c0.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0))); + r = _mm_add_ps(r, _mm_mul_ps(a.c1.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)))); + r = _mm_add_ps(r, _mm_mul_ps(a.c2.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)))); + result[i].data = r; + } + #elif defined(MATH_SIMD_SUPPORT_NEON) + for (int i = 0; i < 4; ++i) + { + auto c = b[i].data; + auto r = vmulq_f32(a.c0.data, vdupq_laneq_f32(c, 0)); + r = vmlaq_f32(r, a.c1.data, vdupq_laneq_f32(c, 1)); + r = vmlaq_f32(r, a.c2.data, vdupq_laneq_f32(c, 2)); + result[i].data = r; + } + #else + result = simd_f32_4x4((float3x3)a * (float3x3)b, a.c3); + #endif + return result; +} +/** + * @brief Calculates 3x3 dot product between SIMD matrix and vector. + * @param[in] m target SIMD matrix to use + * @param v target SIMD vector to dot by + */ +static simd_f32_4 multiply3x3(const simd_f32_4x4& m, simd_f32_4 v) noexcept +{ + simd_f32_4 result; + #if defined(MATH_SIMD_SUPPORT_SSE) + auto r = _mm_mul_ps(m.c0.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0))); + r = _mm_add_ps(r, _mm_mul_ps(m.c1.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1)))); + r = _mm_add_ps(r, _mm_mul_ps(m.c2.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2)))); + result = r; + #elif defined(MATH_SIMD_SUPPORT_NEON) + auto r = vmulq_f32(m.c0.data, vdupq_laneq_f32(v.data, 0)); + r = vmlaq_f32(r, m.c1.data, vdupq_laneq_f32(v.data, 1)); + r = vmlaq_f32(r, m.c2.data, vdupq_laneq_f32(v.data, 2)); + result = r; + #else + auto t = float3x3((float3)m.c0, (float3)m.c1, (float3)m.c2) * (float3)v; + result = simd_f32_4(t.x, t.y, t.z, v.floats.w); + #endif + return result.swizzle(); +} -static simd_f32_4x4 transpose(const simd_f32_4x4& m) +/** + * @brief Calculates 4x4 SIMD matrix transpose. (Flips matrix over its diagonal) + * @param[in] m target SIMD matrix to transpose + */ +static simd_f32_4x4 transpose4x4(const simd_f32_4x4& m) noexcept { - #if defined(JPH_USE_SSE) - auto t1 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(1, 0, 1, 0)); - auto t3 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(3, 2, 3, 2)); - auto t2 = _mm_shuffle_ps(mCol[2].mValue, mCol[3].mValue, _MM_SHUFFLE(1, 0, 1, 0)); - auto t4 = _mm_shuffle_ps(mCol[2].mValue, mCol[3].mValue, _MM_SHUFFLE(3, 2, 3, 2)); + #if defined(MATH_SIMD_SUPPORT_SSE) + auto t1 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(1, 0, 1, 0)); + auto t3 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(3, 2, 3, 2)); + auto t2 = _mm_shuffle_ps(m.c2.data, m.c3.data, _MM_SHUFFLE(1, 0, 1, 0)); + auto t4 = _mm_shuffle_ps(m.c2.data, m.c3.data, _MM_SHUFFLE(3, 2, 3, 2)); return simd_f32_4x4( _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(2, 0, 2, 0)), _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(3, 1, 3, 1)), _mm_shuffle_ps(t3, t4, _MM_SHUFFLE(2, 0, 2, 0)), _mm_shuffle_ps(t3, t4, _MM_SHUFFLE(3, 1, 3, 1))); - #elif defined(JPH_USE_NEON) - auto t1 = vzipq_f32(mCol[0].mValue, mCol[2].mValue); - auto t2 = vzipq_f32(mCol[1].mValue, mCol[3].mValue); + #elif defined(MATH_SIMD_SUPPORT_NEON) + auto t1 = vzipq_f32(m.c0.data, m.c2.data); + auto t2 = vzipq_f32(m.c1.data, m.c3.data); auto t3 = vzipq_f32(t1.val[0], t2.val[0]); auto t4 = vzipq_f32(t1.val[1], t2.val[1]); return simd_f32_4x4(t3.val[0], t3.val[1], t4.val[0], t4.val[1]); @@ -235,30 +341,187 @@ static simd_f32_4x4 transpose(const simd_f32_4x4& m) return simd_f32_4x4(transpose(*((const float4x4*)&m))); #endif } +/** + * @brief Calculates 3x3 SIMD matrix transpose. (Flips matrix over its diagonal) + * @param[in] m target SIMD matrix to transpose + */ +static simd_f32_4x4 transpose3x3(const simd_f32_4x4& m) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + auto zero = _mm_setzero_ps(); + auto t1 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(1, 0, 1, 0)); + auto t3 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(3, 2, 3, 2)); + auto t2 = _mm_shuffle_ps(m.c2.data, zero, _MM_SHUFFLE(1, 0, 1, 0)); + auto t4 = _mm_shuffle_ps(m.c2.data, zero, _MM_SHUFFLE(3, 2, 3, 2)); + return simd_f32_4x4( + _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(2, 0, 2, 0)), + _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(3, 1, 3, 1)), + _mm_shuffle_ps(t3, t4, _MM_SHUFFLE(2, 0, 2, 0)), m.c3); + #elif defined(MATH_SIMD_SUPPORT_NEON) + auto t1 = vzipq_f32(m.c0.data, m.c2.data); + auto t2 = vzipq_f32(m.c1.data, vdupq_n_f32(0)); + auto t3 = vzipq_f32(t1.val[0], t2.val[0]); + auto t4 = vzipq_f32(t1.val[1], t2.val[1]); + return simd_f32_4x4(t3.val[0], t3.val[1], t4.val[0], m.c3); + #else + auto r = transpose((float3x3)m); + return simd_f32_4x4( + simd_f32_4(r.c0.x, r.c0.y, r.c0.z, m.c3.floats.x), + simd_f32_4(r.c1.x, r.c1.y, r.c1.z, m.c3.floats.y), + simd_f32_4(r.c2.x, r.c2.y, r.c2.z, m.c3.floats.z), m.c3); + #endif +} /** - * @brief Calculates 3x3 dot product between SIMD matrix and vector. - * @param[int] m target SIMD matrix to use - * @param v target SIMD vector to dot by + * @brief Calculates 4x4 SIMD matrix inverse. (Usefull for undoing transformations) + * @param[in] m target SIMD matrix to inverse */ -static simd_f32_4 multiply3x3(const simd_f32_4x4& m, simd_f32_4 v) noexcept +static simd_f32_4x4 inverse4x4(const simd_f32_4x4& m) noexcept { - simd_f32_4 result; #if defined(MATH_SIMD_SUPPORT_SSE) - auto r = _mm_mul_ps(m.columns[0].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0))); - r = _mm_add_ps(r, _mm_mul_ps(m.columns[1].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1)))); - r = _mm_add_ps(r, _mm_mul_ps(m.columns[2].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2)))); - result = r; + auto t1 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(1, 0, 1, 0)); + auto r1 = _mm_shuffle_ps(m.c2.data, m.c3.data, _MM_SHUFFLE(1, 0, 1, 0)); + auto r0 = _mm_shuffle_ps(t1, r1, _MM_SHUFFLE(2, 0, 2, 0)); + r1 = _mm_shuffle_ps(r1, t1, _MM_SHUFFLE(3, 1, 3, 1)); + t1 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(3, 2, 3, 2)); + auto r3 = _mm_shuffle_ps(m.c2.data, m.c3.data, _MM_SHUFFLE(3, 2, 3, 2)); + auto r2 = _mm_shuffle_ps(t1, r3, _MM_SHUFFLE(2, 0, 2, 0)); + r3 = _mm_shuffle_ps(r3, t1, _MM_SHUFFLE(3, 1, 3, 1)); + + t1 = _mm_mul_ps(r2, r3); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1)); + auto m0 = _mm_mul_ps(r1, t1); + auto m1 = _mm_mul_ps(r0, t1); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2)); + m0 = _mm_sub_ps(_mm_mul_ps(r1, t1), m0); + m1 = _mm_sub_ps(_mm_mul_ps(r0, t1), m1); + m1 = _mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1, 0, 3, 2)); + + t1 = _mm_mul_ps(r1, r2); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1)); + m0 = _mm_add_ps(_mm_mul_ps(r3, t1), m0); + auto m3 = _mm_mul_ps(r0, t1); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2)); + m0 = _mm_sub_ps(m0, _mm_mul_ps(r3, t1)); + m3 = _mm_sub_ps(_mm_mul_ps(r0, t1), m3); + m3 = _mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1, 0, 3, 2)); + + t1 = _mm_mul_ps(_mm_shuffle_ps(r1, r1, _MM_SHUFFLE(1, 0, 3, 2)), r3); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1)); + r2 = _mm_shuffle_ps(r2, r2, _MM_SHUFFLE(1, 0, 3, 2)); + m0 = _mm_add_ps(_mm_mul_ps(r2, t1), m0); + auto m2 = _mm_mul_ps(r0, t1); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2)); + m0 = _mm_sub_ps(m0, _mm_mul_ps(r2, t1)); + m2 = _mm_sub_ps(_mm_mul_ps(r0, t1), m2); + m2 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1, 0, 3, 2)); + + t1 = _mm_mul_ps(r0, r1); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1)); + m2 = _mm_add_ps(_mm_mul_ps(r3, t1), m2); + m3 = _mm_sub_ps(_mm_mul_ps(r2, t1), m3); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2)); + m2 = _mm_sub_ps(_mm_mul_ps(r3, t1), m2); + m3 = _mm_sub_ps(m3, _mm_mul_ps(r2, t1)); + + t1 = _mm_mul_ps(r0, r3); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1)); + m1 = _mm_sub_ps(m1, _mm_mul_ps(r2, t1)); + m2 = _mm_add_ps(_mm_mul_ps(r1, t1), m2); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2)); + m1 = _mm_add_ps(_mm_mul_ps(r2, t1), m1); + m2 = _mm_sub_ps(m2, _mm_mul_ps(r1, t1)); + + t1 = _mm_mul_ps(r0, r2); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1)); + m1 = _mm_add_ps(_mm_mul_ps(r3, t1), m1); + m3 = _mm_sub_ps(m3, _mm_mul_ps(r1, t1)); + t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2)); + m1 = _mm_sub_ps(m1, _mm_mul_ps(r3, t1)); + m3 = _mm_add_ps(_mm_mul_ps(r1, t1), m3); + + auto d = _mm_mul_ps(r0, m0); + d = _mm_add_ps(_mm_shuffle_ps(d, d, _MM_SHUFFLE(2, 3, 0, 1)), d); + d = _mm_add_ss(_mm_shuffle_ps(d, d, _MM_SHUFFLE(1, 0, 3, 2)), d); + d = _mm_div_ss(_mm_set_ss(1.0f), d); + d = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0, 0, 0, 0)); + + return simd_f32_4x4(_mm_mul_ps(d, m0), _mm_mul_ps(d, m1), + _mm_mul_ps(d, m2), _mm_mul_ps(d, m3)); #elif defined(MATH_SIMD_SUPPORT_NEON) - auto r = vmulq_f32(m.columns[0].data, vdupq_laneq_f32(v.data, 0)); - r = vmlaq_f32(r, m.columns[1].data, vdupq_laneq_f32(v.data, 1)); - r = vmlaq_f32(r, m.columns[2].data, vdupq_laneq_f32(v.data, 2)); - result = r; + auto t1 = __builtin_shufflevector(m.c0.data, m.c1.data, 0, 1, 4, 5); + auto r1 = __builtin_shufflevector(m.c2.data, m.c3.data, 0, 1, 4, 5); + auto r0 = __builtin_shufflevector(t1, r1, 0, 2, 4, 6); + r1 = __builtin_shufflevector(r1, t1, 1, 3, 5, 7); + t1 = __builtin_shufflevector(m.c0.data, m.c1.data, 2, 3, 6, 7); + auto r3 = __builtin_shufflevector(m.c2.data, m.c3.data, 2, 3, 6, 7); + auto r2 = __builtin_shufflevector(t1, r3, 0, 2, 4, 6); + r3 = __builtin_shufflevector(r3, t1, 1, 3, 5, 7); + + t1 = vmulq_f32(r2, r3); + t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2); + auto m0 = vmulq_f32(r1, t1); + auto m1 = vmulq_f32(r0, t1); + t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1); + m0 = vsubq_f32(vmulq_f32(r1, t1), m0); + m1 = vsubq_f32(vmulq_f32(r0, t1), m1); + m1 = __builtin_shufflevector(m1, m1, 2, 3, 0, 1); + + t1 = vmulq_f32(r1, r2); + t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2); + m0 = vaddq_f32(vmulq_f32(r3, t1), m0); + auto m3 = vmulq_f32(r0, t1); + t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1); + m0 = vsubq_f32(m0, vmulq_f32(r3, t1)); + m3 = vsubq_f32(vmulq_f32(r0, t1), m3); + m3 = __builtin_shufflevector(m3, m3, 2, 3, 0, 1); + + t1 = __builtin_shufflevector(r1, r1, 2, 3, 0, 1); + t1 = vmulq_f32(t1, r3); + t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2); + r2 = __builtin_shufflevector(r2, r2, 2, 3, 0, 1); + m0 = vaddq_f32(vmulq_f32(r2, t1), m0); + auto m2 = vmulq_f32(r0, t1); + t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1); + m0 = vsubq_f32(m0, vmulq_f32(r2, t1)); + m2 = vsubq_f32(vmulq_f32(r0, t1), m2); + m2 = __builtin_shufflevector(m2, m2, 2, 3, 0, 1); + + t1 = vmulq_f32(r0, r1); + t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2); + m2 = vaddq_f32(vmulq_f32(r3, t1), m2); + m3 = vsubq_f32(vmulq_f32(r2, t1), m3); + t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1); + m2 = vsubq_f32(vmulq_f32(r3, t1), m2); + m3 = vsubq_f32(m3, vmulq_f32(r2, t1)); + + t1 = vmulq_f32(r0, r3); + t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2); + m1 = vsubq_f32(m1, vmulq_f32(r2, t1)); + m2 = vaddq_f32(vmulq_f32(r1, t1), m2); + t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1); + m1 = vaddq_f32(vmulq_f32(r2, t1), m1); + m2 = vsubq_f32(m2, vmulq_f32(r1, t1)); + + t1 = vmulq_f32(r0, r2); + t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2); + m1 = vaddq_f32(vmulq_f32(r3, t1), m1); + m3 = vsubq_f32(m3, vmulq_f32(r1, t1)); + t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1); + m1 = vsubq_f32(m1, vmulq_f32(r3, t1)); + m3 = vaddq_f32(vmulq_f32(r1, t1), m3); + + auto d = vmulq_f32(r0, m0); + d = vdupq_n_f32(vaddvq_f32(d)); + d = vdivq_f32(vdupq_n_f32(1.0f), d); + + return simd_f32_4x4(vmulq_f32(d, m0), vmulq_f32(d, m1), + vmulq_f32(d, m2), vmulq_f32(d, m3)); #else - auto t = float3x3((float3)m.columns[0], (float3)m.columns[1], (float3)m.columns[2]) * (float3)v; - result = simd_f32_4(t.x, t.y, t.z, m.w); + return simd_f32_4x4(inverse(*((const float4x4*)&m))); #endif - return result.swizzle(); } +// TODO: inverse of the 3x3 matrix like in the Jolt lib. + } // namespace math \ No newline at end of file diff --git a/include/math/simd/matrix/transform.hpp b/include/math/simd/matrix/transform.hpp index fe812a9..7cb86d6 100644 --- a/include/math/simd/matrix/transform.hpp +++ b/include/math/simd/matrix/transform.hpp @@ -25,26 +25,26 @@ namespace math { /** - * @brief Returns total matrix translation transformation of an object in 3D space. - * @param[in] m target model matrix to extract from + * @brief Returns total SIMD matrix translation transformation of an object in 3D space. + * @param[in] m target model SIMD matrix to extract from */ static simd_f32_4 getTranslation(const simd_f32_4x4& m) noexcept { - return m.columns[3]; + return m.c3; } /** - * @brief Sets total matrix translation transformation of an object in 3D space. + * @brief Sets total SIMD matrix translation transformation of an object in 3D space. * - * @param[out] m target model matrix to set + * @param[out] m target model SIMD matrix to set * @param t target object position */ static void setTranslation(simd_f32_4x4& m, simd_f32_4 t) noexcept { - m.columns[3] = simd_f32_4(t, m.columns[3].getW()); + m.c3 = simd_f32_4(t, m.c3.getW()); } /** - * @brief Creates a new matrix with a specified object position in 3D space. + * @brief Creates a new SIMD matrix with a specified object position in 3D space. * @param t target object translation */ static simd_f32_4x4 translate(simd_f32_4 t) noexcept @@ -60,8 +60,7 @@ static simd_f32_4x4 translate(simd_f32_4 t) noexcept */ static simd_f32_4x4 translate(const simd_f32_4x4& m, simd_f32_4 t) noexcept { - return simd_f32_4x4(m.columns[0], m.columns[1], m.columns[2], - simd_f32_4(getTranslation(m) + multiply3x3(m, t), m.columns[3].getW())); + return simd_f32_4x4(m.c0, m.c1, m.c2, simd_f32_4(m.c3 + multiply3x3(m, t), m.c3.getW())); } /** * @brief Applies translation transformation to an object in 3D space. [r = translate(t) * m] @@ -71,12 +70,11 @@ static simd_f32_4x4 translate(const simd_f32_4x4& m, simd_f32_4 t) noexcept */ static simd_f32_4x4 translate(simd_f32_4 t, const simd_f32_4x4& m) noexcept { - return simd_f32_4x4(m.columns[0], m.columns[1], m.columns[2], - simd_f32_4(getTranslation(m) + t, m.columns[3].getW())); + return simd_f32_4x4(m.c0, m.c1, m.c2, simd_f32_4(m.c3 + t, m.c3.getW())); } /*********************************************************************************************************************** - * @brief Creates a new matrix with a specified object scale in 3D space. + * @brief Creates a new SIMD matrix with a specified object scale in 3D space. * @param s target object scale */ static simd_f32_4x4 scale(simd_f32_4 s) noexcept @@ -88,122 +86,130 @@ static simd_f32_4x4 scale(simd_f32_4 s) noexcept simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f)); } /** - * @brief Applies scale transformation to an object in 3D space. [r = scale(s) * m] + * @brief Applies scale transformation to an object in 3D space. [r = m * scale(s)] * - * @param[in] m target model matrix to scale + * @param[in] m target model SIMD matrix to scale * @param s target object scale */ -static simd_f32_4x4 scale(simd_f32_4 s, const simd_f32_4x4& m) noexcept +static simd_f32_4x4 scale(const simd_f32_4x4& m, simd_f32_4 s) noexcept { - auto sm = scale(s); - return simd_f32_4x4(sm * m.columns[0], sm * m.columns[1], sm * m.columns[2], sm * m.columns[3]); + return simd_f32_4x4(m.c0 * simd_f32_4(s.getX()), + m.c1 * simd_f32_4(s.getY()), m.c2 * simd_f32_4(s.getZ()), m.c3); } /** - * @brief Applies scale transformation to an object in 3D space. [r = m * scale(s)] + * @brief Applies scale transformation to an object in 3D space. [r = scale(s) * m] * - * @param[in] m target model matrix to scale + * @param[in] m target model SIMD matrix to scale * @param s target object scale */ -static simd_f32_4x4 scale(const simd_f32_4x4& m, simd_f32_4 s) noexcept +static simd_f32_4x4 scale(simd_f32_4 s, const simd_f32_4x4& m) noexcept { - return simd_f32_4x4(m.columns[0] * simd_f32_4(s.getX()), - m.columns[1] * simd_f32_4(s.getY()), m.columns[2] * simd_f32_4(s.getZ()), m.columns[3]); + auto sm = scale(s); + return simd_f32_4x4(sm * m.c0, sm * m.c1, sm * m.c2, sm * m.c3); } /** - * @brief Extracts total matrix scale transformation of an object in 3D space. - * @param[in] m target model matrix to extract from + * @brief Extracts total SIMD matrix scale transformation of an object in 3D space. + * @param[in] m target model SIMD matrix to extract from */ static simd_f32_4 extractScale(const simd_f32_4x4& m) noexcept { - return simd_f32_4(length(m.c0), length((float3)m.c1), length((float3)m.c2)); + return simd_f32_4(lengthSq3(m.c0), lengthSq3(m.c1), lengthSq3(m.c2)); } /** - * @brief Extracts total matrix 2D scale transformation of an object in 3D space. - * @param[in] m target model matrix to extract from + * @brief Extracts total SIMD matrix 2D scale transformation of an object in 3D space. + * @param[in] m target model SIMD matrix to extract from */ static float2 extractScale2(const simd_f32_4x4& m) noexcept { - return float2(length((float3)m.c0), length((float3)m.c1)); + return float2(lengthSq3(m.c0), length3(m.c1)); } /*********************************************************************************************************************** - * @brief Creates a new matrix with a specified object rotation in 3D space. + * @brief Creates a new SIMD matrix with a specified object rotation in 3D space. * @param q target object rotation */ -static simd_f32_4x4 rotate(quat q) noexcept +static simd_f32_4x4 rotate(simd_f32_quat q) noexcept { - auto xx = q.x * q.x, yy = q.y * q.y, zz = q.z * q.z; - auto xz = q.x * q.z, xy = q.x * q.y, yz = q.y * q.z; - auto wx = q.w * q.x, wy = q.w * q.y, wz = q.w * q.z; + auto x = q.getX(), y = q.getY(), z = q.getZ(), w = q.getW(); + auto xx = x * x, yy = y * y, zz = z * z; + auto xz = x * z, xy = x * y, yz = y * z; + auto wx = w * x, wy = w * y, wz = w * z; - return float4x4( - 1.0f - 2.0f * (yy + zz), 2.0f * (xy - wz), 2.0f * (xz + wy), 0.0f, - 2.0f * (xy + wz), 1.0f - 2.0f * (xx + zz), 2.0f * (yz - wx), 0.0f, - 2.0f * (xz - wy), 2.0f * (yz + wx), 1.0f - 2.0f * (xx + yy), 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f); + return simd_f32_4x4( + simd_f32_4(1.0f - 2.0f * (yy + zz), 2.0f * (xy - wz), 2.0f * (xz + wy), 0.0f), + simd_f32_4(2.0f * (xy + wz), 1.0f - 2.0f * (xx + zz), 2.0f * (yz - wx), 0.0f), + simd_f32_4(2.0f * (xz - wy), 2.0f * (yz + wx), 1.0f - 2.0f * (xx + yy), 0.0f), + simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f)); } /** - * @brief Calculates a new rotation matrix from look vectors in 3D space. + * @brief Calculates a new rotation SIMD matrix from look vectors in 3D space. * - * @param[in] lookFrom viewer position in 3D space - * @param[in] lookTo object position in 3D space - * @param[in] up space up direction vector + * @param lookFrom viewer position in 3D space + * @param lookTo object position in 3D space + * @param up space up direction vector */ -static simd_f32_4x4 rotate(const float3& lookFrom, const float3& lookTo, const float3& up = float3::top) noexcept +static simd_f32_4x4 rotate(simd_f32_4 lookFrom, simd_f32_4 lookTo, + simd_f32_4 up = simd_f32_4(0.0f, 1.0f, 0.0f, 0.0f)) noexcept { - auto f = normalize(lookTo - lookFrom); - auto s = normalize(cross(up, f)); - auto u = cross(f, s); + auto f = normalize3(lookTo - lookFrom); + auto s = normalize3(cross3(up, f)); + auto u = cross3(f, s); - return float4x4( - s.x, s.y, s.z, 0.0f, - u.x, u.y, u.z, 0.0f, - f.x, f.y, f.z, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f); + return simd_f32_4x4(simd_f32_4(s, 0.0f), simd_f32_4(u, 0.0f), + simd_f32_4(f, 0.0f), simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f)); } /** - * @brief Calculates a new matrix from direction vector in 3D space. + * @brief Calculates a new SIMD matrix from direction vector in 3D space. * - * @param[in] front view direction vector in 3D space - * @param[in] up space up direction vector + * @param front view direction vector in 3D space + * @param up space up direction vector */ -static simd_f32_4x4 rotate(const float3& front, const float3& up = float3::top) noexcept +static simd_f32_4x4 rotate(simd_f32_4 front, simd_f32_4 up = simd_f32_4(0.0f, 1.0f, 0.0f, 0.0f)) noexcept { - auto f = front; - auto s = normalize(cross(up, f)); - auto u = cross(f, s); + auto s = normalize3(cross3(up, front)); + auto u = cross3(front, s); - return float4x4( - s.x, s.y, s.z, 0.0f, - u.x, u.y, u.z, 0.0f, - f.x, f.y, f.z, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f); + return simd_f32_4x4(simd_f32_4(s, 0.0f), simd_f32_4(u, 0.0f), + simd_f32_4(front, 0.0f), simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f)); } /** - * @brief Extracts total matrix rotation transformation of an object in 3D space. - * @param[in] m target model matrix to extract from + * @brief Extracts total SIMD matrix rotation transformation of an object in 3D space. + * @param[in] m target model SIMD matrix to extract from */ static simd_f32_4x4 extractRotation(const simd_f32_4x4& m) noexcept { auto invScale = 1.0f / extractScale(m); - auto c0 = (float3)m.c0 * invScale.x; - auto c1 = (float3)m.c1 * invScale.y; - auto c2 = (float3)m.c2 * invScale.z; - return float4x4(float4(c0, 0.0f), float4(c1, 0.0f), float4(c2, 0.0f), float4(0.0f, 0.0f, 0.0f, 1.0f)); + auto c0 = m.c0 * invScale.getX(); + auto c1 = m.c1 * invScale.getY(); + auto c2 = m.c2 * invScale.getZ(); + + return simd_f32_4x4(simd_f32_4(c0, 0.0f), simd_f32_4(c1, 0.0f), + simd_f32_4(c2, 0.0f), simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f)); +} +/** + * @brief Extracts total SIMD matrix rotation transformation of an object in 3D space. + * @warning Matrix should be only translated and/or rotated, without scaling!!! + * @param[in] m target model SIMD matrix to extract from + */ +static simd_f32_4x4 extractRotationOnly(const simd_f32_4x4& m) noexcept +{ + return simd_f32_4x4(simd_f32_4(m.c0, 0.0f), simd_f32_4(m.c1, 0.0f), + simd_f32_4(m.c2, 0.0f), simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f)); } /** - * @brief Extracts total matrix rotation quaternion of an object in 3D space. - * @param[in] m target rotation matrix to extract from + * @brief Extracts total SIMD matrix rotation quaternion of an object in 3D space. + * @param m target rotation SIMD matrix to extract from */ -static quat extractQuat(const float3x3& m) noexcept +static simd_f32_quat extractQuat(simd_f32_4x4 m) noexcept { - auto fourXSquaredMinus1 = m.c0.x - m.c1.y - m.c2.z; - auto fourYSquaredMinus1 = m.c1.y - m.c0.x - m.c2.z; - auto fourZSquaredMinus1 = m.c2.z - m.c0.x - m.c1.y; - auto fourWSquaredMinus1 = m.c0.x + m.c1.y + m.c2.z; + auto c0X = m.c0.getX(), c1Y = m.c1.getY(), c2Z = m.c2.getZ(); + auto fourXSquaredMinus1 = c0X - c1Y - c2Z; + auto fourYSquaredMinus1 = c1Y - c0X - c2Z; + auto fourZSquaredMinus1 = c2Z - c0X - c1Y; + auto fourWSquaredMinus1 = c0X + c1Y + c2Z; int biggestIndex = 0; auto fourBiggestSquaredMinus1 = fourWSquaredMinus1; @@ -228,80 +234,100 @@ static quat extractQuat(const float3x3& m) noexcept switch (biggestIndex) { - case 0: return quat((m.c1.z - m.c2.y) * mult, (m.c2.x - m.c0.z) * mult, (m.c0.y - m.c1.x) * mult, biggestVal); - case 1: return quat(biggestVal, (m.c0.y + m.c1.x) * mult, (m.c2.x + m.c0.z) * mult, (m.c1.z - m.c2.y) * mult); - case 2: return quat((m.c0.y + m.c1.x) * mult, biggestVal, (m.c1.z + m.c2.y) * mult, (m.c2.x - m.c0.z) * mult); - case 3: return quat((m.c2.x + m.c0.z) * mult, (m.c1.z + m.c2.y) * mult, biggestVal, (m.c0.y - m.c1.x) * mult); + case 0: return simd_f32_quat((m.c1.getZ() - m.c2.getY()) * mult, + (m.c2.getX() - m.c0.getZ()) * mult, (m.c0.getY() - m.c1.getX()) * mult, biggestVal); + case 1: return simd_f32_quat(biggestVal, (m.c0.getY() + m.c1.getX()) * mult, + (m.c2.getX() + m.c0.getZ()) * mult, (m.c1.getZ() - m.c2.getY()) * mult); + case 2: return simd_f32_quat((m.c0.getY() + m.c1.getX()) * mult, biggestVal, + (m.c1.getZ() + m.c2.getY()) * mult, (m.c2.getX() - m.c0.getZ()) * mult); + case 3: return simd_f32_quat((m.c2.getX() + m.c0.getZ()) * mult, + (m.c1.getZ() + m.c2.getY()) * mult, biggestVal, (m.c0.getY() - m.c1.getX()) * mult); default: abort(); } } -/** - * @brief Extracts total matrix rotation quaternion of an object in 3D space. - * @param[in] m target rotation matrix to extract from - */ -static quat extractQuat(const simd_f32_4x4& m) noexcept -{ - return extractQuat((float3x3)m); -} /*********************************************************************************************************************** - * @brief Calculates object model matrix from it position, rotation and scale. + * @brief Calculates object model SIMD matrix from it position, rotation and scale. * - * @param[in] position object position in 3D space - * @param[in] rotation object rotation in 3D space - * @param[in] scale object scale in 3D space + * @param position object position in 3D space + * @param rotation object rotation in 3D space + * @param scale object scale in 3D space */ -static simd_f32_4x4 calcModel(const float3& position = float3::zero, - const quat& rotation = quat::identity, const float3& scale = float3::one) noexcept +static simd_f32_4x4 calcModel(simd_f32_4 position = simd_f32_4::zero, + simd_f32_quat rotation = simd_f32_quat::identity, simd_f32_4 scale = simd_f32_4::one) noexcept { - return translate(position) * rotate(normalize(rotation)) * math::scale(scale); + return scale == simd_f32_4::one ? translate(position, rotate(normalize3(rotation))) : + translate(position) * rotate(normalize3(rotation)) * math::scale(scale); } /** - * @brief Extracts total matrix position, rotation and scale of an object in 3D space. (Decompose) + * @brief Extracts total SIMD matrix position, rotation and scale of an object in 3D space. (Decompose) * - * @param[in] m target model matrix to extract from + * @param[in] m target model SIMD matrix to extract from * @param[out] position object position in 3D space - * @param[out] scale object scale in 3D space * @param[out] rotation object rotation in 3D space + * @param[out] scale object scale in 3D space */ -static void extractTransform(const simd_f32_4x4& m, float3& position, float3& scale, quat& rotation) noexcept +static void extractTransform(const simd_f32_4x4& m, simd_f32_4& position, + simd_f32_quat& rotation, simd_f32_4& scale) noexcept { position = getTranslation(m); - scale = extractScale(m); rotation = extractQuat(extractRotation(m)); + scale = extractScale(m); +} +/** + * @brief Extracts total SIMD matrix position and rotation of an object in 3D space. (Decompose) + * + * @param[in] m target model SIMD matrix to extract from + * @param[out] position object position in 3D space + * @param[out] rotation object rotation in 3D space + */ +static void extractTransform(const simd_f32_4x4& m, simd_f32_4& position, simd_f32_quat& rotation) noexcept +{ + position = getTranslation(m); + rotation = extractQuat(extractRotationOnly(m)); } /*********************************************************************************************************************** - * @brief Calculates a new model matrix from look vectors in 3D space. + * @brief Calculates a new SIMD model matrix from look vectors in 3D space. * * @param[in] from viewer position in 3D space * @param[in] to object position in 3D space * @param[in] up space up direction vector */ -static simd_f32_4x4 lookAt(const float3& from, const float3& to, const float3& up = float3::top) noexcept +static simd_f32_4x4 lookAt(simd_f32_4 from, simd_f32_4 to, simd_f32_4 up = simd_f32_4(0.0f, 1.0f, 0.0f, 0.0f)) noexcept { - auto f = normalize(to - from); - auto s = normalize(cross(up, f)); - auto u = cross(f, s); - return simd_f32_4x4( - s.x, s.y, s.z, -dot(s, from), - u.x, u.y, u.z, -dot(u, from), - f.x, f.y, f.z, -dot(f, from), - 0.0f, 0.0f, 0.0f, 1.0f); + auto f = normalize3(to - from); + auto s = normalize3(cross3(up, f)); + auto u = cross3(f, s); + + return simd_f32_4x4(simd_f32_4(s, -dot3(s, from)), simd_f32_4(u, -dot3(u, from)), + simd_f32_4(f, -dot3(f, from)), simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f)); } /** - * @brief Calculates a new quaternion from direction vector in 3D space. + * @brief Calculates a new SIMD quaternion from direction vector in 3D space. * - * @param[in] direction viewer direction in 3D space - * @param[in] up space up direction vector + * @param direction viewer direction in 3D space + * @param up space up direction vector */ -static quat lookAtQuat(const float3& direction, const float3& up = float3::top) noexcept +static simd_f32_quat lookAtQuat(simd_f32_4 direction, simd_f32_4 up = simd_f32_4(0.0f, 1.0f, 0.0f, 0.0f)) noexcept { - auto right = cross(up, direction); - auto c0 = right * (1.0f / std::sqrt(std::max(0.00001f, dot(right, right)))); - auto c1 = cross(right, c0); - auto m = float3x3(c0, c1, direction); + auto right = cross3(up, direction); + auto c0 = right * (1.0f / std::sqrt(std::max(0.00001f, dot3(right, right)))); + auto c1 = cross3(right, c0); + auto m = simd_f32_4x4(c0, c1, direction); return extractQuat(m); } +/** + * @brief Calculates translated/rotated SIMD matrix inverse. + * @warning Matrix should be only translated and/or rotated, without scaling!!! + * @param[in] m target SIMD matrix to inverse + */ +static simd_f32_4x4 inverseTransRot(const simd_f32_4x4& m) +{ + auto t = transpose4x4(m); + setTranslation(t, -multiply3x3(t, getTranslation(m))); + return t; +} + } // namespace math \ No newline at end of file diff --git a/include/math/simd/quaternion.hpp b/include/math/simd/quaternion.hpp new file mode 100644 index 0000000..38b2403 --- /dev/null +++ b/include/math/simd/quaternion.hpp @@ -0,0 +1,197 @@ +// Copyright 2022-2025 Nikita Fediuchin. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*********************************************************************************************************************** + * @file + * @brief Common single instruction multiple data (SIMD) floating point quaternion functions. + */ + +#pragma once +#include "math/simd/vector/float.hpp" + +namespace math +{ + +/** + * @brief SIMD quaternion rotation container. (In 3D space) + * @details Represents rotation using complex numbers, avoiding gimbal lock problem. + */ +struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_quat : public simd_f32_4 +{ + /** + * @brief Creates a new identity SIMD quaternion structure. + * @note Identity quaternion represents rotation of 0 degrees around all axis. + * @param xyzw quaternion parts vector + */ + simd_f32_quat(simd_f32_4 xyzw = identity) noexcept : simd_f32_4(xyzw) { } + /** + * @brief Creates a new SIMD quaternion structure. + * + * @param x imaginary vector X part + * @param y imaginary vector Y part + * @param z imaginary vector Z part + * @param w real scalar part + */ + simd_f32_quat(float x, float y, float z, float w) noexcept : simd_f32_4(x, y, z, w) { } + + /** + * @brief Creates a new SIMD quaternion structure from the angle and axis. (In radians) + * + * @param angle target rotation around axis in radians + * @param axis target 3D axis to rotate around + */ + simd_f32_quat(float angle, simd_f32_4 axis) noexcept + { + simd_f32_4 s, c; + sinCos(simd_f32_4(angle * 0.5f), s, c); + *this = select(simd_u32_4(0, 0, 0, UINT32_MAX), c, axis * s); + } + + /** + * @brief Rotates this SIMD quaternion by the specified one. + * @param q target SIMD quaternion to rotate by + */ + simd_f32_quat operator*(simd_f32_quat q) const noexcept + { + auto x = getX(), y = getY(), z = getZ(), w = getW(); + auto qX = q.getX(), qY = q.getY(), qZ = q.getZ(), qW = q.getW(); + return simd_f32_quat( + w * qX + x * qW + y * qZ - z * qY, + w * qY + y * qW + z * qX - x * qZ, + w * qZ + z * qW + x * qY - y * qX, + w * qW - x * qX - y * qY - z * qZ); + } + /** + * @brief Rotates 3D vector by this SIMD quaternion. + * @param v target 3D vector to rotate + */ + simd_f32_4 operator*(simd_f32_4 v) const noexcept + { + auto cq = cross3(*this, v), ccq = cross3(*this, cq); + return fma(fma(cq, simd_f32_4(getW()), ccq), simd_f32_4(2.0f), v); + } + /** + * @brief Rotates this SIMD quaternion by the specified one. + * @param q target SIMD quaternion to rotate by + */ + simd_f32_4& operator*=(simd_f32_4 q) noexcept { return *this = *this * q; } + + /** + * @brief Extracts SIMD quaternion rotation around X axis. (In radians) + */ + float extractPitch() const noexcept + { + auto x = getX(), y = getY(), z = getZ(), w = getW(); + auto yy = 2.0f * (y * z + w * x); + auto xx = w * w - x * x - y * y + z * z; + + // Avoid atan2(0, 0) - handle singularity - Matiis + if (std::abs(xx) <= FLT_EPSILON || std::abs(yy) <= FLT_EPSILON) + return 2.0f * std::atan2(x, w); + return std::atan2(yy, xx); + } + /** + * @brief Extracts SIMD quaternion around Y axis. (In radians) + */ + float extractYaw() const noexcept + { + return std::asin(std::clamp(-2.0f * (getX() * getZ() - getW() * getY()), -1.0f, 1.0f)); + } + /** + * @brief Extracts SIMD quaternion around Z axis. (In radians) + */ + float extractRoll() const noexcept + { + auto x = getX(), y = getY(), z = getZ(), w = getW(); + return std::atan2(2.0f * (x * y + w * z), w * w + x * x - y * y - z * z); + } + /** + * @brief Calculates SIMD quaternion euler angles. (In radians) + */ + simd_f32_4 extractEulerAngles() const noexcept { return simd_f32_4(extractPitch(), extractYaw(), extractRoll()); } + + static const simd_f32_quat identity; +}; + +//********************************************************************************************************************** +inline const simd_f32_quat simd_f32_quat::identity = simd_f32_quat(0.0f, 0.0f, 0.0f, 1.0f); + +/** + * @brief Creates a new SIMD quaternion structure from the euler angles. (In radians) + * @param eulerAngles target 3D euler angles SIMD vector + */ +static simd_f32_quat fromEulerAngles(simd_f32_4 eulerAngles) noexcept +{ + simd_f32_4 s, c; + sinCos(eulerAngles * 0.5f, s, c); + auto sX = s.getX(), sY = s.getY(), sZ = s.getZ(); + auto cX = c.getX(), cY = c.getY(), cZ = c.getZ(); + return simd_f32_quat( + sX * cY * cZ - cX * sY * sZ, + cX * sY * cZ + sX * cY * sZ, + cX * cY * sZ - sX * sY * cZ, + cX * cY * cZ + sX * sY * sZ); +} + +/** + * @brief SIMD quaternion spherical linear interpolation from a to b. + * + * @param a starting SIMD quaternion value + * @param b target SIMD quaternion value + * @param t interpolation value (from 0.0 to 1.0) + */ +static simd_f32_quat slerp(simd_f32_quat a, simd_f32_quat b, float t) noexcept +{ + auto cosTheta = dot4(a, b); auto c = (simd_f32_4)b; + if (cosTheta < 0.0f) + { + c = -b; + cosTheta = -cosTheta; + } + + if (cosTheta > 1.0f - FLT_EPSILON) + { + return lerp(a, c, t); + } + else + { + auto angle = std::acos(cosTheta); + return ((simd_f32_4)a * std::sin((1.0f - t) * angle) + c * std::sin(t * angle)) / std::sin(angle); + } +} + +/** + * @brief Inverts (undoes) SIMD quaternion rotation. + * @param q target quaternion to conjugate + */ +static simd_f32_quat conjugate(simd_f32_quat q) noexcept +{ + static const auto mask = floatAsUint(simd_f32_4(-0.0f, -0.0f, -0.0f, 0.0f)); + return uintAsFloat(floatAsUint(q) ^ mask); +} +/** + * @brief Calculates inverse of the SIMD quaternion. + * @param q target quaternion to inverse + */ +static simd_f32_quat inverse(simd_f32_quat q) noexcept { return conjugate(q) / dot4(q, q); } + +/** + * @brief Rotates 3D vector by the inversed SIMD quaternion. + * + * @param v target 3D vector to rotate + * @param q target SIMD quaternion to use + */ +static simd_f32_4 operator*(simd_f32_4 v, simd_f32_quat q) noexcept { return inverse(q) * v; } + +} // namespace math \ No newline at end of file diff --git a/include/math/simd/types.hpp b/include/math/simd/types.hpp index c5e83f4..fbc6070 100644 --- a/include/math/simd/types.hpp +++ b/include/math/simd/types.hpp @@ -18,7 +18,6 @@ */ #pragma once -#include "math/vector/float.hpp" #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) diff --git a/include/math/simd/vector/float.hpp b/include/math/simd/vector/float.hpp index 4f55a99..42a9207 100644 --- a/include/math/simd/vector/float.hpp +++ b/include/math/simd/vector/float.hpp @@ -26,15 +26,17 @@ namespace math /** * @brief SIMD 4 component 32bit floating point vector structure. + * @details Commonly used to represent: points, positions, directions, velocities, etc. * @note Use it when you know how to implement a faster vectorized code. */ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 { union { - _simd_f128 data; /** Packed SIMD 4 component 32bit floating point vector data. */ - float4 vec; - + _simd_f128 data; + float4 floats; + int4 ints; + uint4 uints; }; simd_f32_4(const simd_f32_4& v) = default; @@ -50,7 +52,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = vdupq_n_f32(0.0f); #else - vec = float4::zero; + floats = float4::zero; #endif } /** @@ -64,7 +66,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = vdupq_n_f32(xyzw); #else - vec = float4(xyzw); + floats = float4(xyzw); #endif } /** @@ -82,7 +84,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { x, y, z, w }; #else - vec = float4(x, y, z, w); + floats = float4(x, y, z, w); + #endif + } + /** + * @brief Creates a new SIMD 4 component 32bit floating point vector structure. + * @warning This constructor duplicates Z component to the W component! + * + * @param x first vector component value + * @param y second vector component value + * @param z third vector component value + */ + simd_f32_4(float x, float y, float z) noexcept + { + #if defined(MATH_SIMD_SUPPORT_SSE) + data = _mm_set_ps(z, z, y, x); + #elif defined(MATH_SIMD_SUPPORT_NEON) + data = { x, y, z, z }; + #else + floats = float4(x, y, z, z); #endif } /** @@ -96,11 +116,11 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #if defined(MATH_SIMD_SUPPORT_SSE4_1) data = _mm_blend_ps(xyz.data, _mm_set1_ps(w), 8); #elif defined(MATH_SIMD_SUPPORT_SSE) - data = _mm_set_ps(w, xyz.vec.z, xyz.vec.y, xyz.vec.x); + data = _mm_set_ps(w, xyz.floats.z, xyz.floats.y, xyz.floats.x); #elif defined(MATH_SIMD_SUPPORT_NEON) data = vsetq_lane_f32(w, xyz.data, 3); #else - vec = xyz.vec; vec.w = w; + floats = xyz.floats; floats.w = w; #endif } @@ -114,30 +134,30 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 /** * @brief Creates a new SIMD 4 component 32bit floating point vector structure. - * @param data target vector unsigned integer SIMD data + * @param v target vector unsigned integer SIMD data */ explicit simd_f32_4(simd_u32_4 v) { #if defined(MATH_SIMD_SUPPORT_SSE) - this->data = _mm_cvtepi32_ps(v.data); + data = _mm_cvtepi32_ps(v.data); #elif defined(MATH_SIMD_SUPPORT_NEON) - this->data = vcvtq_f32_u32(v.data); + data = vcvtq_f32_u32(v.data); #else - vec = v.vec; + floats = v.floats; #endif } /** * @brief Creates a new SIMD 4 component 32bit floating point vector structure. - * @param data target vector signed integer SIMD data + * @param v target vector signed integer SIMD data */ explicit simd_f32_4(simd_i32_4 v) { #if defined(MATH_SIMD_SUPPORT_SSE) - this->data = _mm_cvtepi32_ps(v.data); + data = _mm_cvtepi32_ps(v.data); #elif defined(MATH_SIMD_SUPPORT_NEON) - this->data = vcvtq_f32_s32(v.data); + data = vcvtq_f32_s32(v.data); #else - vec = v.vec; + floats = v.floats; #endif } @@ -152,7 +172,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { v.x, v.y, v.z, v.w }; #else - vec = v; + floats = v; #endif } /** @@ -166,7 +186,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { v.x, v.y, v.z, v.z }; #else - vec = float4(v, v.z); + floats = float4(v, v.z); #endif } /** @@ -180,7 +200,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { v.x, v.y, v.y, v.y }; #else - vec = float4(v, v.y, v.y); + floats = float4(v, v.y, v.y); #endif } /** @@ -194,7 +214,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = vld1q_f32((const float*)v); #else - vec = *v; + floats = *v; #endif } @@ -208,7 +228,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcvtq_u32_f32(data); #else - return simd_u32_4((uint4)vec); + return simd_u32_4((uint4)floats); #endif } /** @@ -221,107 +241,53 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcvtq_s32_f32(data); #else - return simd_i32_4((uint4)vec); - #endif - } - - /******************************************************************************************************************* - * @brief Casts as 4 component unsigned integer SIMD vector. (Doesn't change the bits) - */ - simd_u32_4 castToUint() const noexcept - { - #if defined(MATH_SIMD_SUPPORT_SSE) - return _mm_castps_si128(data); - #elif defined(MATH_SIMD_SUPPORT_NEON) - return vreinterpretq_u32_f32(data); - #else - return *((const simd_u32_4*)this); - #endif - } - /** - * @brief Casts as 4 component signed integer SIMD vector. (Doesn't change the bits) - */ - simd_i32_4 castToInt() const noexcept - { - #if defined(MATH_SIMD_SUPPORT_SSE) - return _mm_castps_si128(data); - #elif defined(MATH_SIMD_SUPPORT_NEON) - return vreinterpretq_us32_f32(data); - #else - return *((const simd_i32_4*)this); - #endif - } - - /** - * @brief Casts from the 4 component unsigned integer SIMD vector. (Doesn't change the bits) - */ - void castFrom(simd_u32_4 v) noexcept - { - #if defined(MATH_SIMD_SUPPORT_SSE) - data = _mm_castsi128_ps(v.data); - #elif defined(MATH_SIMD_SUPPORT_NEON) - data = vreinterpretq_f32_u32(v.data); - #else - *this = *((const simd_f32_4*)&v); - #endif - } - /** - * @brief Casts from the 4 component signed integer SIMD vector. (Doesn't change the bits) - */ - void castFrom(simd_i32_4 v) noexcept - { - #if defined(MATH_SIMD_SUPPORT_SSE) - data = _mm_castsi128_ps(v.data); - #elif defined(MATH_SIMD_SUPPORT_NEON) - data = vreinterpretq_f32_s32(v.data); - #else - *this = *((const simd_f32_4*)&v); + return simd_i32_4((uint4)floats); #endif } /******************************************************************************************************************* - * @brief Loads SIMD 4 component 32bit floating point aligned vector value. + * @brief Loads SIMD 4 component 32bit floating point aligned vector values. * @warning Specified vector pointer must be aligned in the memory!!! * @param[in] v target 4 component vector value pointer (aligned) */ - void loadAligned(const float4* v) noexcept + void loadAligned(const float* v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - data = _mm_load_ps((const float*)v); + data = _mm_load_ps(v); #elif defined(MATH_SIMD_SUPPORT_NEON) - data = vld1q_f32((const float*)v); + data = vld1q_f32(v); #else - vec = *v; + floats = *((const float4*)v); #endif } /** - * @brief Stores SIMD 4 component 32bit floating point unaligned vector value. + * @brief Stores SIMD 4 component 32bit floating point unaligned vector values. * @param[out] v target 4 component vector value pointer (unaligned) */ - void store(float4* v) noexcept + void store(float* v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - _mm_storeu_ps((float*)v, data); + _mm_storeu_ps(v, data); #elif defined(MATH_SIMD_SUPPORT_NEON) - vst1q_f32((float*)v, data); + vst1q_f32(v, data); #else - *v = vec; + *((float4*)v) = floats; #endif } /** - * @brief Stores SIMD 4 component 32bit floating point aligned vector value. + * @brief Stores SIMD 4 component 32bit floating point aligned vector values. * @warning Specified vector pointer must be aligned in the memory!!! * @param[out] v target 4 component vector value pointer (aligned) */ - void storeAligned(float4* v) noexcept + void storeAligned(float* v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - _mm_store_ps((float*)v, data); + _mm_store_ps(v, data); #elif defined(MATH_SIMD_SUPPORT_NEON) - vst1q_f32((float*)v, data); + vst1q_f32(v, data); #else - *v = vec; + *((float4*)v) = floats; #endif } @@ -335,7 +301,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_f32(data, 0); #else - return vec.x; + return floats.x; #endif } /** @@ -346,7 +312,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #if defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_f32(data, 1); #else - return vec.y; + return floats.y; #endif } /** @@ -357,7 +323,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #if defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_f32(data, 2); #else - return vec.z; + return floats.z; #endif } /** @@ -368,7 +334,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #if defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_f32(data, 3); #else - return vec.w; + return floats.w; #endif } @@ -376,22 +342,22 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 * @brief Sets SIMD vector first component value. * @param value target X vector component value */ - void setX(float value) noexcept { vec.x = value; } + void setX(float value) noexcept { floats.x = value; } /** * @brief Sets SIMD vector second component value. * @param value target Y vector component value */ - void setY(float value) noexcept { vec.y = value; } + void setY(float value) noexcept { floats.y = value; } /** * @brief Sets SIMD vector third component value. * @param value target Z vector component value */ - void setZ(float value) noexcept { vec.z = value; } + void setZ(float value) noexcept { floats.z = value; } /** * @brief Sets SIMD vector fourth component value. * @param value target W vector component value */ - void setW(float value) noexcept { vec.w = value; } + void setW(float value) noexcept { floats.w = value; } /** * @brief Swizzles SIMD vector components. @@ -414,7 +380,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return __builtin_shufflevector(data, data, X, Y, Z, W); #else - return simd_f32_4(vec[X], vec[Y], vec[Z], vec[W]); + return simd_f32_4(floats[X], floats[Y], floats[Z], floats[W]); #endif } @@ -427,25 +393,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 * @brief Returns SIMD vector component by index. * @param i target component index */ - float& operator[](psize i) noexcept { return vec[i]; } + float& operator[](psize i) noexcept { return floats[i]; } /** * @brief Returns SIMD vector component by index. * @param i target component index */ - float operator[](psize i) const noexcept { return vec[i]; } + float operator[](psize i) const noexcept { return floats[i]; } /** * @brief Returns SIMD vector as 4 component floating point vector. (xyzw) */ - explicit operator float4() const noexcept { return vec; } + explicit operator float4() const noexcept { return floats; } /** * @brief Returns SIMD vector as 3 component floating point vector. (xyz) */ - explicit operator float3() const noexcept { return (float3)vec; } + explicit operator float3() const noexcept { return (float3)floats; } /** * @brief Returns SIMD vector as 2 component floating point vector. (xy) */ - explicit operator float2() const noexcept { return (float2)vec; } + explicit operator float2() const noexcept { return (float2)floats; } /** * @brief Returns SIMD first vector component value. (x) */ @@ -459,7 +425,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vaddq_f32(data, v.data); #else - return simd_f32_4(vec + v.vec); + return simd_f32_4(floats + v.floats); #endif } simd_f32_4 operator-(simd_f32_4 v) const noexcept @@ -469,7 +435,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vsubq_f32(data, v.data); #else - return simd_f32_4(vec - v.vec); + return simd_f32_4(floats - v.floats); #endif } simd_f32_4 operator*(simd_f32_4 v) const noexcept @@ -479,7 +445,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vmulq_f32(data, v.data); #else - return simd_f32_4(vec * v.vec); + return simd_f32_4(floats * v.floats); #endif } simd_f32_4 operator/(simd_f32_4 v) const noexcept @@ -489,7 +455,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vdivq_f32(data, v.data); #else - return simd_f32_4(vec / v.vec); + return simd_f32_4(floats / v.floats); #endif } simd_f32_4 operator-() const noexcept @@ -499,18 +465,29 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vnegq_f32(data); #else - return simd_f32_4(-vec); + return simd_f32_4(-floats); + #endif + } + simd_f32_4 operator+(float n) const noexcept { return *this + simd_f32_4(n); } + simd_f32_4 operator-(float n) const noexcept { return *this - simd_f32_4(n); } + simd_f32_4 operator*(float n) const noexcept + { + #if defined(MATH_SIMD_SUPPORT_NEON) + return vmulq_n_f32(data, v); + #else + return *this * simd_f32_4(n); #endif } + simd_f32_4 operator/(float n) const noexcept { return *this / simd_f32_4(n); } simd_f32_4& operator+=(simd_f32_4 v) noexcept { *this = *this + v; return *this; } simd_f32_4& operator-=(simd_f32_4 v) noexcept { *this = *this - v; return *this; } simd_f32_4& operator*=(simd_f32_4 v) noexcept { *this = *this * v; return *this; } simd_f32_4& operator/=(simd_f32_4 v) noexcept { *this = *this / v; return *this; } - simd_f32_4& operator+=(float n) noexcept { *this = *this + simd_f32_4(n); return *this; } - simd_f32_4& operator-=(float n) noexcept { *this = *this - simd_f32_4(n); return *this; } - simd_f32_4& operator*=(float n) noexcept { *this = *this * simd_f32_4(n); return *this; } - simd_f32_4& operator/=(float n) noexcept { *this = *this / simd_f32_4(n); return *this; } + simd_f32_4& operator+=(float n) noexcept { *this = *this + n; return *this; } + simd_f32_4& operator-=(float n) noexcept { *this = *this - n; return *this; } + simd_f32_4& operator*=(float n) noexcept { *this = *this * n; return *this; } + simd_f32_4& operator/=(float n) noexcept { *this = *this / n; return *this; } simd_f32_4& operator=(float n) noexcept { *this = simd_f32_4(n); return *this; } //****************************************************************************************************************** @@ -521,7 +498,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vminvq_u32(vceqq_f32(data, v.data)) == 0xFFFFFFFFu; #else - return vec == v.vec; + return floats == v.floats; #endif } bool operator!=(simd_f32_4 v) const noexcept @@ -531,7 +508,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vminvq_u32(vceqq_f32(data, v.data)) == 0u; #else - return vec != v.vec; + return floats != v.floats; #endif } simd_u32_4 operator<(simd_f32_4 v) const noexcept @@ -541,7 +518,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcltq_f32(data, v.data); #else - return simd_u32_4(vec < v.vec); + return simd_u32_4(floats < v.floats); #endif } simd_u32_4 operator>(simd_f32_4 v) const noexcept @@ -551,7 +528,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcgtq_f32(data, v.data); #else - return simd_u32_4(vec > v.vec); + return simd_u32_4(floats > v.floats); #endif } simd_u32_4 operator<=(simd_f32_4 v) const noexcept @@ -561,7 +538,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcleq_f32(data, v.data); #else - return simd_u32_4(vec <= v.vec); + return simd_u32_4(floats <= v.floats); #endif } simd_u32_4 operator>=(simd_f32_4 v) const noexcept @@ -571,23 +548,97 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcgeq_f32(data, v.data); #else - return simd_u32_4(vec >= v.vec); + return simd_u32_4(floats >= v.floats); #endif } - static const simd_f32_4 zero, one, minusOne, inf, minusInf, nan; + bool operator==(float n) const noexcept { return *this == simd_f32_4(n); } + bool operator!=(float n) const noexcept { return *this != simd_f32_4(n); } + simd_u32_4 operator<(float n) const noexcept { return *this < simd_f32_4(n); } + simd_u32_4 operator>(float n) const noexcept { return *this > simd_f32_4(n); } + simd_u32_4 operator<=(float n) const noexcept { return *this <= simd_f32_4(n); } + simd_u32_4 operator>=(float n) const noexcept { return *this >= simd_f32_4(n); } + + static const simd_f32_4 zero, one, minusOne, inf, minusInf, nan, min, minusMin, max, minusMax, epsilon; }; +//********************************************************************************************************************** inline const simd_f32_4 simd_f32_4::zero = simd_f32_4(0.0f); inline const simd_f32_4 simd_f32_4::one = simd_f32_4(1.0f); inline const simd_f32_4 simd_f32_4::minusOne = simd_f32_4(-1.0f); inline const simd_f32_4 simd_f32_4::inf = simd_f32_4(INFINITY); inline const simd_f32_4 simd_f32_4::minusInf = simd_f32_4(-INFINITY); inline const simd_f32_4 simd_f32_4::nan = simd_f32_4(NAN); +inline const simd_f32_4 simd_f32_4::min = simd_f32_4(FLT_MIN); +inline const simd_f32_4 simd_f32_4::minusMin = simd_f32_4(-FLT_MIN); +inline const simd_f32_4 simd_f32_4::max = simd_f32_4(FLT_MAX); +inline const simd_f32_4 simd_f32_4::minusMax = simd_f32_4(-FLT_MAX); +inline const simd_f32_4 simd_f32_4::epsilon = simd_f32_4(FLT_EPSILON); + +/** + * @brief Casts from the 4 component unsigned integer to floating point SIMD vector. (Doesn't change the bits) + */ +static simd_f32_4 uintAsFloat(simd_u32_4 v) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + return _mm_castsi128_ps(v.data); + #elif defined(MATH_SIMD_SUPPORT_NEON) + return vreinterpretq_f32_u32(v.data); + #else + return *((const simd_f32_4*)&v); + #endif +} +/** + * @brief Casts from the 4 component signed integer to floating point SIMD vector. (Doesn't change the bits) + */ +static simd_f32_4 intAsFloat(simd_i32_4 v) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + return _mm_castsi128_ps(v.data); + #elif defined(MATH_SIMD_SUPPORT_NEON) + return vreinterpretq_f32_s32(v.data); + #else + return *((const simd_f32_4*)&v); + #endif +} + +/** + * @brief Casts as 4 component floating point to unsigned integer SIMD vector. (Doesn't change the bits) + */ +static simd_u32_4 floatAsUint(simd_f32_4 v) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + return _mm_castps_si128(v.data); + #elif defined(MATH_SIMD_SUPPORT_NEON) + return vreinterpretq_u32_f32(data); + #else + return *((const simd_u32_4*)&v); + #endif +} +/** + * @brief Casts as 4 component floating point to signed integer SIMD vector. (Doesn't change the bits) + */ +static simd_i32_4 floatAsInt(simd_f32_4 v) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + return _mm_castps_si128(v.data); + #elif defined(MATH_SIMD_SUPPORT_NEON) + return vreinterpretq_s32_f32(v.data); + #else + return *((const simd_i32_4*)&v); + #endif +} static simd_f32_4 operator+(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) + v; } static simd_f32_4 operator-(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) - v; } -static simd_f32_4 operator*(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) * v; } +static simd_f32_4 operator*(float n, simd_f32_4 v) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_NEON) + return vmulq_n_f32(v.data, n); + #else + return simd_f32_4(n) * v; + #endif +} static simd_f32_4 operator/(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) / v; } static bool operator==(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) == v; } static bool operator!=(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) != v; } @@ -602,16 +653,33 @@ static simd_u32_4 operator>=(float n, simd_f32_4 v) noexcept { return simd_f32_4 * @param a first SIMD vector to compare * @param b second SIMD vector to compare */ -static simd_u32_4 compare(simd_f32_4 a, simd_f32_4 b) noexcept +static simd_u32_4 equal(simd_f32_4 a, simd_f32_4 b) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - return simd_f32_4(_mm_cmpeq_ps(a.data, b.data)).castToUint(); + return _mm_castps_si128(_mm_cmpeq_ps(a.data, b.data)); #elif defined(MATH_SIMD_SUPPORT_NEON) return vceqq_f32(a.data, b.data); #else - return simd_u32_4(compare(a.vec, b.vec)); + return simd_u32_4(equal(a.floats, b.floats)); #endif } +/** + * @brief Compares two SIMD vectors component wise if they are not equal. + * + * @param a first SIMD vector to compare + * @param b second SIMD vector to compare + */ +static simd_u32_4 notEqual(simd_f32_4 a, simd_f32_4 b) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + return _mm_castps_si128(_mm_cmpneq_ps(a.data, b.data)); + #elif defined(MATH_SIMD_SUPPORT_NEON) + return vmvnq_u32(vceqq_f32(a.data, b.data)); + #else + return simd_u32_4(notEqual(a.floats, b.floats)); + #endif +} + /** * @brief Returns true if first SIMD vector binary representation is less than the second. * @@ -637,7 +705,7 @@ static simd_f32_4 select(simd_u32_4 c, simd_f32_4 t, simd_f32_4 f) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vbslq_f32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(c.data), 31)), t.data, f.data); #else - return simd_f32_4(select(c.vec, t.vec, f.vec)); + return simd_f32_4(select(c.floats, t.floats, f.floats)); #endif } @@ -654,7 +722,7 @@ static simd_f32_4 min(simd_f32_4 a, simd_f32_4 b) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vminq_f32(a.data, b.data); #else - return simd_f32_4(min(a.vec, b.vec)); + return simd_f32_4(min(a.floats, b.floats)); #endif } /** @@ -670,31 +738,52 @@ static simd_f32_4 max(simd_f32_4 a, simd_f32_4 b) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vmaxq_f32(a.data, b.data); #else - return simd_f32_4(max(a.vec, b.vec)); + return simd_f32_4(max(a.floats, b.floats)); #endif } /** - * @brief Returns minimum value of all SIMD vector components. + * @brief Returns minimum value of 4D SIMD vector components. * @param v target SIMD vector */ -static float min(simd_f32_4 v) noexcept +static float min4(simd_f32_4 v) noexcept { auto m = min(v, v.swizzle()); m = min(m, m.swizzle()); return m.getX(); } /** - * @brief Returns maximum value of all SIMD vector components. + * @brief Returns maximum value of 4D SIMD vector components. * @param v target SIMD vector */ -static float max(simd_f32_4 v) noexcept +static float max4(simd_f32_4 v) noexcept { auto m = max(v, v.swizzle()); m = max(m, m.swizzle()); return m.getX(); } +/** + * @brief Returns minimum value of 3D SIMD vector components. + * @param v target SIMD vector + */ +static float min3(simd_f32_4 v) noexcept +{ + auto m = min(v, v.swizzle()); + m = min(m, m.swizzle()); + return m.getX(); +} +/** + * @brief Returns maximum value of 3D SIMD vector components. + * @param v target SIMD vector + */ +static float max3(simd_f32_4 v) noexcept +{ + auto m = max(v, v.swizzle()); + m = max(m, m.swizzle()); + return m.getX(); +} + /** * @brief Clamps SIMD vector components between min and max values. * @@ -723,7 +812,7 @@ static simd_f32_4 fma(simd_f32_4 mul1, simd_f32_4 mul2, simd_f32_4 add) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vmlaq_f32(add.data, mul1.data, mul2.data); #else - return simd_f32_4(fma(mul1.vec, mul2.vec, add.vec)); + return simd_f32_4(fma(mul1.floats, mul2.floats, add.floats)); #endif } /** @@ -733,11 +822,14 @@ static simd_f32_4 fma(simd_f32_4 mul1, simd_f32_4 mul2, simd_f32_4 add) noexcept static simd_f32_4 abs(simd_f32_4 v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), v.data), v.data); + if constexpr (std::numeric_limits::is_iec559) + return _mm_and_ps(v.data, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF))); + else + return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), v.data), v.data); #elif defined(MATH_SIMD_SUPPORT_NEON) return vabsq_f32(v.data); #else - return simd_f32_4(abs(v.vec)); + return simd_f32_4(abs(v.floats)); #endif } /** @@ -751,7 +843,7 @@ static simd_f32_4 sqrt(simd_f32_4 v) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vsqrtq_f32(v.data); #else - return simd_f32_4(sqrt(v.vec)); + return simd_f32_4(sqrt(v.floats)); #endif } @@ -767,7 +859,7 @@ static simd_f32_4 sign(simd_f32_4 v) noexcept return vreinterpretq_f32_u32(vorrq_u32(vandq_u32(vreinterpretq_u32_f32(v.data), vreinterpretq_u32_f32(vdupq_n_f32(-1.0f))), vreinterpretq_u32_f32(vdupq_n_f32(1.0f)))); #else - return simd_f32_4(sign(v.vec)); + return simd_f32_4(sign(v.floats)); #endif } /** @@ -782,7 +874,7 @@ static int signBits(simd_f32_4 v) noexcept auto shift = { 0, 1, 2, 3 }; return vaddvq_u32(vshlq_u32(vshrq_n_u32(vreinterpretq_u32_f32(v.data), 31), shift)); #else - return signBits(v.vec); + return signBits(v.floats); #endif } @@ -797,7 +889,7 @@ static simd_f32_4 round(simd_f32_4 v) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vrndnq_f32(v.data); #else - return simd_f32_4(round(v.vec)); + return simd_f32_4(round(v.floats)); #endif } /** @@ -811,7 +903,7 @@ static simd_f32_4 floor(simd_f32_4 v) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vrndmq_f32(v.data); #else - return simd_f32_4(floor(v.vec)); + return simd_f32_4(floor(v.floats)); #endif } /** @@ -825,7 +917,7 @@ static simd_f32_4 ceil(simd_f32_4 v) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vrndpq_f32(v.data); #else - return simd_f32_4(ceil(v.vec)); + return simd_f32_4(ceil(v.floats)); #endif } /** @@ -839,12 +931,10 @@ static simd_f32_4 trunc(simd_f32_4 v) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vrndq_f32(v.data); #else - return simd_f32_4(trunc(v.vec)); + return simd_f32_4(trunc(v.floats)); #endif } -// TODO: implement or use libs with fast sin, cos, pow, etc. functions. - /*********************************************************************************************************************** * @brief Returns dot product between two 4D SIMD vector. * @@ -858,7 +948,7 @@ static simd_f32_4 dotV4(simd_f32_4 a, simd_f32_4 b) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vdupq_n_f32(vaddvq_f32(vmulq_f32(a.data, b.data))); #else - return simd_f32_4(dot(a.vec, b.vec)); + return simd_f32_4(dot(a.floats, b.floats)); #endif } /** @@ -874,7 +964,7 @@ static float dot4(simd_f32_4 a, simd_f32_4 b) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vaddvq_f32(vmulq_f32(a.data, b.data)); #else - return dot(a.vec, b.vec); + return dot(a.floats, b.floats); #endif } @@ -891,7 +981,7 @@ static simd_f32_4 dotV3(simd_f32_4 a, simd_f32_4 b) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vdupq_n_f32(vaddvq_f32(vsetq_lane_f32(0, vmulq_f32(a.data, b.data), 3))); #else - return simd_f32_4(dot((float3)a.vec, (float3)b.vec)); + return simd_f32_4(dot((float3)a.floats, (float3)b.floats)); #endif } /** @@ -907,7 +997,7 @@ static float dot3(simd_f32_4 a, simd_f32_4 b) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vaddvq_f32(vsetq_lane_f32(0, vmulq_f32(a.data, b.data), 3)); #else - return dot((float3)a.vec, (float3)b.vec); + return dot((float3)a.floats, (float3)b.floats); #endif } @@ -930,8 +1020,8 @@ static simd_f32_4 cross3(simd_f32_4 a, simd_f32_4 b) noexcept vmulq_f32(__builtin_shufflevector(a.data, a.data, 1, 2, 0, 0), b.data)); return __builtin_shufflevector(r, r, 1, 2, 0, 0); #else - auto r = cross((float3)a.vec, (float3)b.vec); - return simd_f32_4(r.x, r.y, r.z, r.z); + auto r = cross((float3)a.floats, (float3)b.floats); + return simd_f32_4(r.x, r.y, r.z); // TODO: not sure if this is the same result as SIMD, check this. #endif } @@ -951,7 +1041,7 @@ static float length4(simd_f32_4 v) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vget_lane_f32(vsqrt_f32(vdup_n_f32(vaddvq_f32(vmulq_f32(v.data, v.data)))), 0); #else - return length(v.vec); + return length(v.floats); #endif } @@ -967,11 +1057,11 @@ static float lengthSq3(simd_f32_4 v) noexcept { return dot3(v, v); } static float length3(simd_f32_4 v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE4_1) - return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.data, v.data, 0xff))); + return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.data, v.data, 0x7f))); #elif defined(MATH_SIMD_SUPPORT_NEON) return vget_lane_f32(vsqrt_f32(vdup_n_f32(vaddvq_f32(vmulq_f32(v.data, v.data)))), 0); #else - return length((float3)v.vec); + return length((float3)v.floats); #endif } @@ -1031,9 +1121,230 @@ static simd_f32_4 normalize3(simd_f32_4 v) noexcept return vdivq_f32(v.data, vsqrtq_f32(vdupq_n_f32( vaddvq_f32(vsetq_lane_f32(0, vmulq_f32(v.data, v.data), 3))))); #else - auto r = normalize((float3)v.vec); - return simd_f32_4(r.x, r.y, r.z, r.z); + auto r = normalize((float3)v.floats); + return simd_f32_4(r.x, r.y, r.z); #endif } +/** + * @brief Returns true if SIMD vector is normalized with specified tolerance. + * + * @param v target SIMD vector to check + * @param tolerance floating point precision tolerance + */ +static bool isNormalized4(simd_f32_4 v, float tolerance = 1.0e-6f) noexcept +{ + return std::abs(lengthSq4(v) - 1.0f) <= tolerance; +} +/** + * @brief Returns true if SIMD vector is normalized with specified tolerance. + * + * @param v target SIMD vector to check + * @param tolerance floating point precision tolerance + */ +static bool isNormalized3(simd_f32_4 v, float tolerance = 1.0e-6f) noexcept +{ + return std::abs(lengthSq3(v) - 1.0f) <= tolerance; +} + +/*********************************************************************************************************************** + * @brief Returns float remainder of numer/denom for each element of the SIMD vector. + * + * @param n quatient numerator SIMD vector + * @param d quatient denominator SIMD vector + */ +static simd_f32_4 mod(simd_f32_4 n, simd_f32_4 d) noexcept +{ + return n - floor(n / d) * d; +} + +/** + * @brief Returns natural logarithm for each element of the SIMD vector. [r = inv(e^x)] + * @param v target SIMD vector + */ +static simd_f32_4 log(simd_f32_4 v) noexcept +{ + if constexpr (std::numeric_limits::is_iec559) + { + // Based on sleefsimdsp.c from the Sleef library. + auto o = v < simd_f32_4::min; + v = select(o, v * ((float)((int64)1 << 32) * (float)((int64)1 << 32)), v); + + auto e = ((floatAsUint(v * (1.0f / 0.75f)) >> 23u) & 0xFFu) - 0x7Fu; + auto m = intAsFloat(floatAsInt(v) + ((-uintAsInt(e)) << 23)); + e = select(o, e - 64, e); + + auto x = (m - simd_f32_4::one) / (m + simd_f32_4::one); + auto x2 = x * x; + + auto t = simd_f32_4(0.2392828464508056640625f); + t = fma(t, x2, simd_f32_4(0.28518211841583251953125f)); + t = fma(t, x2, simd_f32_4(0.400005877017974853515625f)); + t = fma(t, x2, simd_f32_4(0.28518211841583251953125f)); + t = fma(t, x2, simd_f32_4(0.666666686534881591796875f)); + t = fma(t, x2, simd_f32_4(2.0f)); + + x = fma(x, t, simd_f32_4(0.693147180559945286226764f) * (simd_f32_4)e); + x = select(equal(v, simd_f32_4::inf) | equal(v, simd_f32_4::minusInf), simd_f32_4::minusInf, x); + x = select((v < simd_f32_4::zero) | equal(v, simd_f32_4::nan), simd_f32_4::nan, x); + x = select(equal(v, simd_f32_4::zero), simd_f32_4::minusInf, x); + return x; + } + else + { + return simd_f32_4(std::log(v.getX()), std::log(v.getY()), + std::log(v.getZ()), std::log(v.getW())); + } +} +/** + * @brief Returns base-e exponential function for each element of the SIMD vector. [r = e^x] + * @param v target SIMD vector + */ +static simd_f32_4 exp(simd_f32_4 v) noexcept +{ + if constexpr (std::numeric_limits::is_iec559) + { + // Based on sleefsimdsp.c from the Sleef library. + auto s = v * M_LOG2E; + auto q = (simd_i32_4)select(s < 0.0f, s - 0.5f, s + 0.5f); + auto qFloat = (simd_f32_4)q; + s = fma(qFloat, simd_f32_4(-0.693145751953125f), v); + s = fma(qFloat, simd_f32_4(-1.428606765330187045e-06f), s); + + auto u = simd_f32_4(0.000198527617612853646278381f); + u = fma(u, s, simd_f32_4(0.00139304355252534151077271f)); + u = fma(u, s, simd_f32_4(0.00833336077630519866943359f)); + u = fma(u, s, simd_f32_4(0.0416664853692054748535156f)); + u = fma(u, s, simd_f32_4(0.166666671633720397949219f)); + u = fma(u, s, simd_f32_4(0.5f)); + + u = fma(s * s, u, s) + simd_f32_4::one; + u = u * intAsFloat(((q >> 1) + 0x7f) << 23) * + intAsFloat(((q - (q >> 1)) + 0x7f) << 23); + + u = uintAsFloat(floatAsUint(u) & (v >= -104.0f)); + u = select(v > 104.0f, simd_f32_4::inf, u); + return u; + } + else + { + return simd_f32_4(std::exp(v.getX()), std::exp(v.getY()), + std::exp(v.getZ()), std::exp(v.getW())); + } +} + +/*********************************************************************************************************************** + * @brief Returns base b raised to the power e for each element of the SIMD vector. + * + * @param b SIMD vector with base values + * @param e SIMD vector with exponent values + */ +static simd_f32_4 pow(simd_f32_4 b, simd_f32_4 e) noexcept +{ + // TODO: use SIMD implementation from the sleefsimdsp.c + return simd_f32_4(std::pow(b.getX(), e.getX()), std::pow(b.getY(), e.getY()), + std::pow(b.getZ(), e.getZ()), std::pow(b.getW(), e.getW())); +} +/** + * @brief Returns base b raised to the power e for each element of the SIMD vector. (Fast approximation) + * @warning It has lower precision compared to the pow() function! + * + * @param b SIMD vector with base values + * @param e SIMD vector with exponent values + */ +static simd_f32_4 fastPow(simd_f32_4 b, simd_f32_4 e) noexcept +{ + if constexpr (std::numeric_limits::is_iec559) + { + // Based on sleefsimdsp.c from the Sleef library. + auto r = exp(log(abs(b)) * e); + auto eInt = (simd_i32_4)e; auto eAbs = abs(e); + auto eIsInt = equal(e, (simd_f32_4)eInt) | (eAbs >= ((int64)1 << 24)); + auto eIsOdd = notEqual(eInt & 1, simd_i32_4::zero) & eIsInt & (eAbs < ((int64)1 << 24)); + r *= select((b < simd_f32_4::zero) & eIsOdd, simd_f32_4::minusOne, simd_f32_4::one); + r = uintAsFloat(bic(floatAsUint(r), equal(b, simd_f32_4::zero))); + r = select(equal(e, simd_f32_4::zero), simd_f32_4::one, r); + return r; + } + else + { + return pow(b, e); + } +} + +/*********************************************************************************************************************** + * @brief Calculates sine and cosine for each element of the SIMD vector. + * + * @param v target SIMD vector to get sin and cos from + * @param[out] sin SIMD vector with sine values + * @param[out] cos SIMD vector with cosine values + */ +static void sinCos(simd_f32_4 v, simd_f32_4& sin, simd_f32_4& cos) noexcept +{ + if constexpr (std::numeric_limits::is_iec559) + { + // Based on sinf.c from the cephes library + auto sinSign = floatAsUint(v) & 0x80000000u; + auto x = uintAsFloat(floatAsUint(v) ^ sinSign); + auto quadrant = (simd_u32_4)fma(simd_f32_4(M_2_PI), x, simd_f32_4(0.5f)); + auto floatQuadrant = (simd_f32_4)quadrant; + x = ((x - floatQuadrant * 1.5703125f) - floatQuadrant * + 0.0004837512969970703125f) - floatQuadrant * 7.549789948768648e-8f; + + auto x2 = x * x; + auto taylorCos = fma((2.443315711809948e-5f * x2 - 1.388731625493765e-3f), x2, + simd_f32_4(4.166664568298827e-2f)) * x2 * x2 - 0.5f * x2 + simd_f32_4::one; + auto taylorSin = (fma(simd_f32_4(-1.9515295891e-4f), x2, + simd_f32_4(8.3321608736e-3f)) * x2 - 1.6666654611e-1f) * x2 * x + x; + + auto bit1 = quadrant << 31u, bit2 = (quadrant << 30u) & 0x80000000u; + auto s = select(bit1, taylorCos, taylorSin); + auto c = select(bit1, taylorSin, taylorCos); + + sinSign ^= bit2; + auto cosSign = bit1 ^ bit2; + sin = uintAsFloat(floatAsUint(s) ^ sinSign); + cos = uintAsFloat(floatAsUint(c) ^ cosSign); + } + else + { + sin = simd_f32_4(std::sin(v.getX()), std::sin(v.getY()), + std::sin(v.getZ()), std::sin(v.getW())); + cos = simd_f32_4(std::cos(v.getX()), std::cos(v.getY()), + std::cos(v.getZ()), std::cos(v.getW())); + } +} + +/*********************************************************************************************************************** + * @brief Remaps each component of SIMD vector to the 0.0 - 1.0 range. + * @param v target SIMD vector + */ +static simd_f32_4 repeat(simd_f32_4 v) noexcept +{ + auto r = select(v < 0.0f, 1.0f - mod(-v, simd_f32_4::one), v); + return select(v < 1.0f, r, mod(v, simd_f32_4::one)); +} + +/** + * @brief Linearly interpolates each component of the SIMD vector between a and b using t. + * + * @param a minimum SIMD vector (t == 0.0) + * @param b maximum SIMD vector (t == 1.0) + * @param t target interpolation value (0.0 - 1.0) + */ +static simd_f32_4 lerp(simd_f32_4 a, simd_f32_4 b, float t) noexcept { return a * (1.0f - t) + b * t; } +/** + * @brief Linearly interpolates each component of the SIMD vector between a and b using t, taking into account delta time. + * @note Always use this function instead of basic lerp() when you have variable delta time! + * + * @param a minimum SIMD vector (t == 0.0) + * @param b maximum SIMD vector (t == 1.0) + * @param t target interpolation value (0.0 - 1.0) + * @param dt current delta time + */ +static simd_f32_4 lerpDelta(simd_f32_4 a, simd_f32_4 b, float f, float dt) noexcept +{ + return a + (1.0f - std::pow(f, dt)) * (b - a); +} + } // namespace math \ No newline at end of file diff --git a/include/math/simd/vector/int.hpp b/include/math/simd/vector/int.hpp index f24bf8b..e1c3484 100644 --- a/include/math/simd/vector/int.hpp +++ b/include/math/simd/vector/int.hpp @@ -33,7 +33,9 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 union { _simd_i128 data; /** Packed SIMD 4 component 32bit signed integer vector data. */ - int4 vec; + int4 ints; + uint4 uints; + float4 floats; }; simd_i32_4(const simd_i32_4& v) = default; @@ -49,7 +51,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = vdupq_n_s32(0); #else - vec = int4::zero; + ints = int4::zero; #endif } /** @@ -63,7 +65,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = vdupq_n_s32(xyzw); #else - vec = int4(xyzw); + ints = int4(xyzw); #endif } /** @@ -81,7 +83,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { x, y, z, w }; #else - vec = int4(x, y, z, w); + ints = int4(x, y, z, w); + #endif + } + /** + * @brief Creates a new SIMD 4 component 32bit signed integer vector structure. + * @warning This constructor duplicates Z component to the W component! + * + * @param x first vector component value + * @param y second vector component value + * @param z third vector component value + */ + simd_i32_4(int32 x, int32 y, int32 z) noexcept + { + #if defined(MATH_SIMD_SUPPORT_SSE) + data = _mm_set_epi32((int)z, (int)z, (int)y, (int)x); + #elif defined(MATH_SIMD_SUPPORT_NEON) + data = { x, y, z, z }; + #else + ints = int4(x, y, z, z); #endif } /** @@ -92,14 +112,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 */ simd_i32_4(simd_i32_4 xyz, int32 w) noexcept { - #if defined(MATH_SIMD_SUPPORT_SSE4_1) + #if defined(MATH_SIMD_SUPPORT_AVX2) data = _mm_blend_epi32(xyz.data, _mm_set1_epi32(w), 8); #elif defined(MATH_SIMD_SUPPORT_SSE) - data = _mm_set_epi32(w, xyz.vec.z, xyz.vec.y, xyz.vec.x); + data = _mm_set_epi32(w, xyz.ints.z, xyz.ints.y, xyz.ints.x); #elif defined(MATH_SIMD_SUPPORT_NEON) - data = vsetq_lane_u32(w, xyz.data, 3); + data = vsetq_lane_s32(w, xyz.data, 3); #else - vec = xyz.vec; vec.w = w; + ints = xyz.ints; ints.w = w; #endif } @@ -113,16 +133,16 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 /** * @brief Creates a new SIMD 4 component 32bit floating point vector structure. - * @param data target vector unsigned integer SIMD data + * @param v target vector unsigned integer SIMD data */ explicit simd_i32_4(simd_u32_4 v) { #if defined(MATH_SIMD_SUPPORT_SSE) - this->data = v.data; + data = v.data; #elif defined(MATH_SIMD_SUPPORT_NEON) - this->data = v.data; + data = vreinterpretq_s32_u32(v.data); #else - vec = v.vec; + ints = v.ints; #endif } @@ -137,7 +157,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { v.x, v.y, v.z, v.w }; #else - vec = v; + ints = v; #endif } /** @@ -151,7 +171,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { v.x, v.y, v.z, v.z }; #else - vec = int4(v, v.z); + ints = int4(v, v.z); #endif } /** @@ -165,7 +185,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { v.x, v.y, v.y, v.y }; #else - vec = int4(v, v.y, v.y); + ints = int4(v, v.y, v.y); #endif } /** @@ -179,7 +199,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = vld1q_s32((const int32*)v); #else - vec = *v; + ints = *v; #endif } @@ -191,55 +211,55 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #if defined(MATH_SIMD_SUPPORT_SSE) return data; #elif defined(MATH_SIMD_SUPPORT_NEON) - return data; + return vreinterpretq_u32_s32(data); #else - return simd_u32_4((uint4)vec); + return simd_u32_4((uint4)ints); #endif } /******************************************************************************************************************* - * @brief Loads SIMD 4 component 32bit signed integer aligned vector value. + * @brief Loads SIMD 4 component 32bit signed integer aligned vector values. * @warning Specified vector pointer must be aligned in the memory!!! * @param[in] v target 4 component vector value pointer (aligned) */ - void loadAligned(const int4* v) noexcept + void loadAligned(const int32* v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - data = _mm_load_si128((const _simd_i128*)v); + data = _mm_load_si128((const __m128i*)v); #elif defined(MATH_SIMD_SUPPORT_NEON) - data = vld1q_s32((const int32*)v); + data = vld1q_s32(v); #else - vec = *v; + ints = *((const int4*)v); #endif } /** - * @brief Stores SIMD 4 component 32bit signed integer unaligned vector value. + * @brief Stores SIMD 4 component 32bit signed integer unaligned vector values. * @param[out] v target 4 component vector value pointer (unaligned) */ - void store(int4* v) noexcept + void store(int32* v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - _mm_storeu_si128((_simd_i128*)v, data); + _mm_storeu_si128((__m128i*)v, data); #elif defined(MATH_SIMD_SUPPORT_NEON) - vst1q_s32((int32*)v, data); + vst1q_s32(v, data); #else - *v = vec; + *((int4*)v) = ints; #endif } /** - * @brief Stores SIMD 4 component 32bit signed integer aligned vector value. + * @brief Stores SIMD 4 component 32bit signed integer aligned vector values. * @warning Specified vector pointer must be aligned in the memory!!! * @param[out] v target 4 component vector value pointer (aligned) */ - void storeAligned(int4* v) noexcept + void storeAligned(int32* v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - _mm_store_si128((_simd_i128*)v, data); + _mm_store_si128((__m128i*)v, data); #elif defined(MATH_SIMD_SUPPORT_NEON) - vst1q_s32((int32*)v, data); + vst1q_s32(v, data); #else - *v = vec; + *((int4*)v) = ints; #endif } @@ -253,7 +273,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_s32(data, 0); #else - return vec.x; + return ints.x; #endif } /** @@ -264,7 +284,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #if defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_s32(data, 1); #else - return vec.y; + return ints.y; #endif } /** @@ -275,7 +295,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #if defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_s32(data, 2); #else - return vec.z; + return ints.z; #endif } /** @@ -286,7 +306,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #if defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_s32(data, 3); #else - return vec.w; + return ints.w; #endif } @@ -294,22 +314,22 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 * @brief Sets SIMD vector first component value. * @param value target X vector component value */ - void setX(int32 value) noexcept { vec.x = value; } + void setX(int32 value) noexcept { ints.x = value; } /** * @brief Sets SIMD vector second component value. * @param value target Y vector component value */ - void setY(int32 value) noexcept { vec.y = value; } + void setY(int32 value) noexcept { ints.y = value; } /** * @brief Sets SIMD vector third component value. * @param value target Z vector component value */ - void setZ(int32 value) noexcept { vec.z = value; } + void setZ(int32 value) noexcept { ints.z = value; } /** * @brief Sets SIMD vector fourth component value. * @param value target W vector component value */ - void setW(int32 value) noexcept { vec.w = value; } + void setW(int32 value) noexcept { ints.w = value; } /** * @brief Swizzles SIMD vector components. @@ -332,7 +352,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return __builtin_shufflevector(data, data, X, Y, Z, W); #else - return simd_i32_4(vec[X], vec[Y], vec[Z], vec[W]); + return simd_i32_4(ints[X], ints[Y], ints[Z], ints[W]); #endif } @@ -345,25 +365,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 * @brief Returns SIMD vector component by index. * @param i target component index */ - int32& operator[](psize i) noexcept { return vec[i]; } + int32& operator[](psize i) noexcept { return ints[i]; } /** * @brief Returns SIMD vector component by index. * @param i target component index */ - int32 operator[](psize i) const noexcept { return vec[i]; } + int32 operator[](psize i) const noexcept { return ints[i]; } /** * @brief Returns SIMD vector as 4 component signed integer vector. (xyzw) */ - explicit operator int4() const noexcept { return vec; } + explicit operator int4() const noexcept { return ints; } /** * @brief Returns SIMD vector as 3 component signed integer vector. (xyz) */ - explicit operator int3() const noexcept { return (int3)vec; } + explicit operator int3() const noexcept { return (int3)ints; } /** * @brief Returns SIMD vector as 2 component signed integer vector. (xy) */ - explicit operator int2() const noexcept { return (int2)vec; } + explicit operator int2() const noexcept { return (int2)ints; } /** * @brief Returns SIMD first vector component value. (x) */ @@ -377,7 +397,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vaddq_s32(data, v.data); #else - return simd_i32_4(vec + v.vec); + return simd_i32_4(ints + v.ints); #endif } simd_i32_4 operator-(simd_i32_4 v) const noexcept @@ -387,7 +407,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vsubq_s32(data, v.data); #else - return simd_i32_4(vec - v.vec); + return simd_i32_4(ints - v.ints); #endif } simd_i32_4 operator*(simd_i32_4 v) const noexcept @@ -397,11 +417,11 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vmulq_s32(data, v.data); #else - return simd_i32_4(vec * v.vec); + return simd_i32_4(ints * v.ints); #endif } - simd_i32_4 operator/(simd_i32_4 v) const noexcept { return simd_i32_4(vec / v.vec); } - simd_i32_4 operator%(simd_i32_4 v) const noexcept { return simd_i32_4(vec % v.vec); } + simd_i32_4 operator/(simd_i32_4 v) const noexcept { return simd_i32_4(ints / v.ints); } + simd_i32_4 operator%(simd_i32_4 v) const noexcept { return simd_i32_4(ints % v.ints); } //****************************************************************************************************************** simd_i32_4 operator&(simd_i32_4 v) const noexcept @@ -411,7 +431,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vandq_s32(data, v.data); #else - return simd_i32_4(vec & v.vec); + return simd_i32_4(ints & v.ints); #endif } simd_i32_4 operator|(simd_i32_4 v) const noexcept @@ -421,7 +441,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vorrq_s32(data, v.data); #else - return simd_i32_4(vec | v.vec); + return simd_i32_4(ints | v.ints); #endif } simd_i32_4 operator^(simd_i32_4 v) const noexcept @@ -431,7 +451,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return veorq_s32(data, v.data); #else - return simd_i32_4(vec ^ v.vec); + return simd_i32_4(ints ^ v.ints); #endif } simd_i32_4 operator>>(simd_i32_4 v) const noexcept @@ -441,7 +461,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vshlq_s32(data, vnegq_s32(v.data)); #else - return simd_i32_4(vec >> v.vec); + return simd_i32_4(ints >> v.ints); #endif } simd_i32_4 operator<<(simd_i32_4 v) const noexcept @@ -451,7 +471,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vshlq_s32(data, v.data); #else - return simd_i32_4(vec << v.vec); + return simd_i32_4(ints << v.ints); #endif } simd_i32_4 operator>>(int32 n) const noexcept @@ -461,17 +481,17 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vshrq_n_s32(data, n); #else - return simd_i32_4(vec >> n); + return simd_i32_4(ints >> n); #endif } simd_i32_4 operator<<(int32 n) const noexcept { - #if defined(MATH_SIMD_SUPPORT_AVX2) + #if defined(MATH_SIMD_SUPPORT_SSE) return _mm_slli_epi32(data, n); #elif defined(MATH_SIMD_SUPPORT_NEON) return vshlq_n_s32(data, n); #else - return simd_i32_4(vec << n); + return simd_i32_4(ints << n); #endif } simd_i32_4 operator-() const noexcept @@ -481,7 +501,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vnegq_s32(data); #else - return simd_i32_4(-vec); + return simd_i32_4(-ints); #endif } simd_i32_4 operator!() const noexcept @@ -491,7 +511,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vandq_s32(vceqq_s32(data, vdupq_n_s32(0)), vdupq_n_s32(1)); #else - return simd_i32_4(!vec); + return simd_i32_4(!ints); #endif } simd_i32_4 operator~() const noexcept @@ -501,11 +521,20 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vmvnq_s32(data); #else - return simd_i32_4(~vec); + return simd_i32_4(~ints); #endif } //****************************************************************************************************************** + simd_i32_4 operator+(int32 n) const noexcept { return *this + simd_i32_4(n); } + simd_i32_4 operator-(int32 n) const noexcept { return *this - simd_i32_4(n); } + simd_i32_4 operator*(int32 n) const noexcept { return *this * simd_i32_4(n); } + simd_i32_4 operator/(int32 n) const noexcept { return *this / simd_i32_4(n); } + simd_i32_4 operator%(int32 n) const noexcept { return *this % simd_i32_4(n); } + simd_i32_4 operator&(int32 n) const noexcept { return *this & simd_i32_4(n); } + simd_i32_4 operator|(int32 n) const noexcept { return *this | simd_i32_4(n); } + simd_i32_4 operator^(int32 n) const noexcept { return *this ^ simd_i32_4(n); } + simd_i32_4& operator+=(simd_i32_4 v) noexcept { *this = *this + v; return *this; } simd_i32_4& operator-=(simd_i32_4 v) noexcept { *this = *this - v; return *this; } simd_i32_4& operator*=(simd_i32_4 v) noexcept { *this = *this * v; return *this; } @@ -537,7 +566,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vminvq_s32(vceqq_s32(data, v.data)) == 0xFFFFFFFF; #else - return vec == v.vec; + return ints == v.ints; #endif } bool operator!=(simd_i32_4 v) const noexcept @@ -547,7 +576,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return !vminvq_s32(vceqq_s32(data, v.data)); #else - return vec != v.vec; + return ints != v.ints; #endif } simd_u32_4 operator<(simd_i32_4 v) const noexcept @@ -557,7 +586,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcltq_s32(data, v.data); #else - return simd_u32_4(vec < v.vec); + return simd_u32_4(ints < v.ints); #endif } simd_u32_4 operator>(simd_i32_4 v) const noexcept @@ -567,7 +596,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcgtq_s32(data, v.data); #else - return simd_u32_4(vec > v.vec); + return simd_u32_4(ints > v.ints); #endif } simd_u32_4 operator<=(simd_i32_4 v) const noexcept @@ -577,7 +606,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcleq_s32(data, v.data); #else - return simd_u32_4(vec <= v.vec); + return simd_u32_4(ints <= v.ints); #endif } simd_u32_4 operator>=(simd_i32_4 v) const noexcept @@ -587,10 +616,17 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcgeq_s32(data, v.data); #else - return simd_u32_4(vec >= v.vec); + return simd_u32_4(ints >= v.ints); #endif } + bool operator==(int32 n) const noexcept { return *this == simd_i32_4(n); } + bool operator!=(int32 n) const noexcept { return *this != simd_i32_4(n); } + simd_u32_4 operator<(int32 n) const noexcept { return *this < simd_i32_4(n); } + simd_u32_4 operator>(int32 n) const noexcept { return *this > simd_i32_4(n); } + simd_u32_4 operator<=(int32 n) const noexcept { return *this <= simd_i32_4(n); } + simd_u32_4 operator>=(int32 n) const noexcept { return *this >= simd_i32_4(n); } + static const simd_i32_4 zero, one, minusOne, min, max; }; @@ -617,22 +653,70 @@ static simd_u32_4 operator>(int32 n, simd_i32_4 v) noexcept { return simd_i32_4( static simd_u32_4 operator<=(int32 n, simd_i32_4 v) noexcept { return simd_i32_4(n) <= v; } static simd_u32_4 operator>=(int32 n, simd_i32_4 v) noexcept { return simd_i32_4(n) >= v; } +/** + * @brief Casts from the 4 component unsigned integer to signed integer SIMD vector. (Doesn't change the bits) + */ +static simd_i32_4 uintAsInt(simd_u32_4 v) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + return v.data; + #elif defined(MATH_SIMD_SUPPORT_NEON) + return vreinterpretq_u32_s32(v.data); + #else + return *((const simd_i32_4*)&v); + #endif +} + /*********************************************************************************************************************** * @brief Compares two SIMD vectors component wise if they are equal. * * @param a first SIMD vector to compare * @param b second SIMD vector to compare */ -static simd_i32_4 compare(simd_i32_4 a, simd_i32_4 b) noexcept +static simd_u32_4 equal(simd_i32_4 a, simd_i32_4 b) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) return _mm_cmpeq_epi32(a.data, b.data); #elif defined(MATH_SIMD_SUPPORT_NEON) return vceqq_s32(a.data, b.data); #else - return simd_i32_4(compare(a.vec, b.vec)); + return simd_u32_4(equal(a.ints, b.ints)); + #endif +} +/** + * @brief Compares two SIMD vectors component wise if they are not equal. + * + * @param a first SIMD vector to compare + * @param b second SIMD vector to compare + */ +static simd_u32_4 notEqual(simd_i32_4 a, simd_i32_4 b) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + return _mm_xor_si128(_mm_cmpeq_epi32(a.data, b.data), _mm_set1_epi32(0xFFFFFFFF)); + #elif defined(MATH_SIMD_SUPPORT_NEON) + return vmvnq_u32(vceqq_s32(a.data, b.data)); + #else + return simd_u32_4(notEqual(a.ints, b.ints)); #endif } + +/** + * @brief SIMD vector bitwise bit clear component wise. [r = a & (~b)] + * + * @param a first SIMD vector + * @param b second SIMD vector + */ +static simd_i32_4 bic(simd_i32_4 a, simd_i32_4 b) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + return _mm_andnot_si128(b.data, a.data); // flipped + #elif defined(MATH_SIMD_SUPPORT_NEON) + return vbicq_s32(a.data, b.data); + #else + return simd_i32_4(a.ints & (~b.ints)); + #endif +} + /** * @brief Returns true if first SIMD vector binary representation is less than the second. * @@ -660,7 +744,7 @@ static simd_i32_4 select(simd_u32_4 c, simd_i32_4 t, simd_i32_4 f) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vbslq_s32(vshrq_n_s32(vreinterpretq_s32_u32(c.data), 31), t.data, f.data); #else - return simd_i32_4(select(c.vec, t.vec, f.vec)); + return simd_i32_4(select(c.ints, t.ints, f.ints)); #endif } @@ -677,7 +761,7 @@ static simd_i32_4 min(simd_i32_4 a, simd_i32_4 b) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vminq_s32(a.data, b.data); #else - return simd_i32_4(min(a.vec, b.vec)); + return simd_i32_4(min(a.ints, b.ints)); #endif } /** @@ -693,7 +777,7 @@ static simd_i32_4 max(simd_i32_4 a, simd_i32_4 b) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vmaxq_s32(a.data, b.data); #else - return simd_i32_4(max(a.vec, b.vec)); + return simd_i32_4(max(a.ints, b.ints)); #endif } diff --git a/include/math/simd/vector/uint.hpp b/include/math/simd/vector/uint.hpp index 90da3ec..ca660c1 100644 --- a/include/math/simd/vector/uint.hpp +++ b/include/math/simd/vector/uint.hpp @@ -39,7 +39,9 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 union { _simd_u128 data; /** Packed SIMD 4 component 32bit unsigned integer vector data. */ - uint4 vec; + uint4 uints; + int4 ints; + float4 floats; }; simd_u32_4(const simd_u32_4& v) = default; @@ -55,7 +57,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = vdupq_n_u32(0); #else - vec = uint4::zero; + uints = uint4::zero; #endif } /** @@ -69,7 +71,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = vdupq_n_u32(xyzw); #else - vec = uint4(xyzw); + uints = uint4(xyzw); #endif } /** @@ -87,7 +89,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { x, y, z, w }; #else - vec = uint4(x, y, z, w); + uints = uint4(x, y, z, w); + #endif + } + /** + * @brief Creates a new SIMD 4 component 32bit unsigned integer vector structure. + * @warning This constructor duplicates Z component to the W component! + * + * @param x first vector component value + * @param y second vector component value + * @param z third vector component value + */ + simd_u32_4(uint32 x, uint32 y, uint32 z) noexcept + { + #if defined(MATH_SIMD_SUPPORT_SSE) + data = _mm_set_epi32((int)z, (int)z, (int)y, (int)x); + #elif defined(MATH_SIMD_SUPPORT_NEON) + data = { x, y, z, z }; + #else + uints = uint4(x, y, z, z); #endif } /** @@ -98,14 +118,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 */ simd_u32_4(simd_u32_4 xyz, uint32 w) noexcept { - #if defined(MATH_SIMD_SUPPORT_SSE4_1) + #if defined(MATH_SIMD_SUPPORT_AVX2) data = _mm_blend_epi32(xyz.data, _mm_set1_epi32((int)w), 8); #elif defined(MATH_SIMD_SUPPORT_SSE) - data = _mm_set_epi32((int)w, (int)xyz.vec.z, (int)xyz.vec.y, (int)xyz.vec.x); + data = _mm_set_epi32((int)w, (int)xyz.uints.z, (int)xyz.uints.y, (int)xyz.uints.x); #elif defined(MATH_SIMD_SUPPORT_NEON) data = vsetq_lane_u32(w, xyz.data, 3); #else - vec = xyz.vec; vec.w = w; + uints = xyz.uints; uints.w = w; #endif } @@ -128,7 +148,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { v.x, v.y, v.z, v.w }; #else - vec = v; + uints = v; #endif } /** @@ -142,7 +162,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { v.x, v.y, v.z, v.z }; #else - vec = uint4(v, v.z); + uints = uint4(v, v.z); #endif } /** @@ -156,7 +176,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) data = { v.x, v.y, v.y, v.y }; #else - vec = uint4(v, v.y, v.y); + uints = uint4(v, v.y, v.y); #endif } /** @@ -166,11 +186,11 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 explicit simd_u32_4(const uint4* v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - data = _mm_loadu_si128((const _simd_u128*)v); + data = _mm_loadu_si128((const __m128i*)v); #elif defined(MATH_SIMD_SUPPORT_NEON) - data = vld1q_u32((const uint32*)v); + data = vld1q_u32(v); #else - vec = *v; + uints = *v; #endif } @@ -179,14 +199,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 * @warning Specified vector pointer must be aligned in the memory!!! * @param[in] v target 4 component vector value pointer (aligned) */ - void loadAligned(const uint4* v) noexcept + void loadAligned(const uint32* v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - data = _mm_load_si128((const _simd_u128*)v); + data = _mm_load_si128((const __m128i*)v); #elif defined(MATH_SIMD_SUPPORT_NEON) - data = vld1q_u32((const uint32*)v); + data = vld1q_u32(v); #else - vec = *v; + uints = *((const uint4*)v); #endif } @@ -194,14 +214,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 * @brief Stores SIMD 4 component 32bit unsigned integer unaligned vector value. * @param[out] v target 4 component vector value pointer (unaligned) */ - void store(uint4* v) noexcept + void store(uint32* v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) - _mm_storeu_si128((_simd_u128*)v, data); + _mm_storeu_si128((__m128i*)v, data); #elif defined(MATH_SIMD_SUPPORT_NEON) - vst1q_u32((uint32*)v, data); + vst1q_u32(v, data); #else - *v = vec; + *((uint4*)v) = uints; #endif } /** @@ -209,14 +229,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 * @warning Specified vector pointer must be aligned in the memory!!! * @param[out] v target 4 component vector value pointer (aligned) */ - void storeAligned(uint4* v) noexcept + void storeAligned(uint32* v) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) _mm_store_si128((_simd_u128*)v, data); #elif defined(MATH_SIMD_SUPPORT_NEON) vst1q_u32((uint32*)v, data); #else - *v = vec; + *((uint4*)v) = uints; #endif } @@ -230,7 +250,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_u32(data, 0); #else - return vec.x; + return uints.x; #endif } /** @@ -241,7 +261,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #if defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_u32(data, 1); #else - return vec.y; + return uints.y; #endif } /** @@ -252,7 +272,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #if defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_u32(data, 2); #else - return vec.z; + return uints.z; #endif } /** @@ -263,7 +283,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #if defined(MATH_SIMD_SUPPORT_NEON) return vgetq_lane_u32(data, 3); #else - return vec.w; + return uints.w; #endif } @@ -271,22 +291,22 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 * @brief Sets SIMD vector first component value. * @param value target X vector component value */ - void setX(uint32 value) noexcept { vec.x = value; } + void setX(uint32 value) noexcept { uints.x = value; } /** * @brief Sets SIMD vector second component value. * @param value target Y vector component value */ - void setY(uint32 value) noexcept { vec.y = value; } + void setY(uint32 value) noexcept { uints.y = value; } /** * @brief Sets SIMD vector third component value. * @param value target Z vector component value */ - void setZ(uint32 value) noexcept { vec.z = value; } + void setZ(uint32 value) noexcept { uints.z = value; } /** * @brief Sets SIMD vector fourth component value. * @param value target W vector component value */ - void setW(uint32 value) noexcept { vec.w = value; } + void setW(uint32 value) noexcept { uints.w = value; } /** * @brief Swizzles SIMD vector components. @@ -309,7 +329,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return __builtin_shufflevector(data, data, X, Y, Z, W); #else - return simd_u32_4(vec[X], vec[Y], vec[Z], vec[W]); + return simd_u32_4(uints[X], uints[Y], uints[Z], uints[W]); #endif } @@ -322,25 +342,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 * @brief Returns SIMD vector component by index. * @param i target component index */ - uint32& operator[](psize i) noexcept { return vec[i]; } + uint32& operator[](psize i) noexcept { return uints[i]; } /** * @brief Returns SIMD vector component by index. * @param i target component index */ - uint32 operator[](psize i) const noexcept { return vec[i]; } + uint32 operator[](psize i) const noexcept { return uints[i]; } /** * @brief Returns SIMD vector as 4 component unsigned integer vector. (xyzw) */ - explicit operator uint4() const noexcept { return vec; } + explicit operator uint4() const noexcept { return uints; } /** * @brief Returns SIMD vector as 3 component unsigned integer vector. (xyz) */ - explicit operator uint3() const noexcept { return (uint3)vec; } + explicit operator uint3() const noexcept { return (uint3)uints; } /** * @brief Returns SIMD vector as 2 component unsigned integer vector. (xy) */ - explicit operator uint2() const noexcept { return (uint2)vec; } + explicit operator uint2() const noexcept { return (uint2)uints; } /** * @brief Returns SIMD first vector component value. (x) */ @@ -354,7 +374,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vaddq_u32(data, v.data); #else - return simd_u32_4(vec + v.vec); + return simd_u32_4(uints + v.uints); #endif } simd_u32_4 operator-(simd_u32_4 v) const noexcept @@ -364,7 +384,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vsubq_u32(data, v.data); #else - return simd_u32_4(vec - v.vec); + return simd_u32_4(uints - v.uints); #endif } simd_u32_4 operator*(simd_u32_4 v) const noexcept @@ -374,11 +394,11 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vmulq_u32(data, v.data); #else - return simd_u32_4(vec * v.vec); + return simd_u32_4(uints * v.uints); #endif } - simd_u32_4 operator/(simd_u32_4 v) const noexcept { return simd_u32_4(vec / v.vec); } - simd_u32_4 operator%(simd_u32_4 v) const noexcept { return simd_u32_4(vec % v.vec); } + simd_u32_4 operator/(simd_u32_4 v) const noexcept { return simd_u32_4(uints / v.uints); } + simd_u32_4 operator%(simd_u32_4 v) const noexcept { return simd_u32_4(uints % v.uints); } //****************************************************************************************************************** simd_u32_4 operator&(simd_u32_4 v) const noexcept @@ -388,7 +408,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vandq_u32(data, v.data); #else - return simd_u32_4(vec & v.vec); + return simd_u32_4(uints & v.uints); #endif } simd_u32_4 operator|(simd_u32_4 v) const noexcept @@ -398,7 +418,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vorrq_u32(data, v.data); #else - return simd_u32_4(vec | v.vec); + return simd_u32_4(uints | v.uints); #endif } simd_u32_4 operator^(simd_u32_4 v) const noexcept @@ -408,7 +428,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return veorq_u32(data, v.data); #else - return simd_u32_4(vec ^ v.vec); + return simd_u32_4(uints ^ v.uints); #endif } simd_u32_4 operator>>(simd_u32_4 v) const noexcept @@ -418,7 +438,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vshlq_u32(data, vnegq_s32(vreinterpretq_s32_u32(v.data))); #else - return simd_u32_4(vec >> v.vec); + return simd_u32_4(uints >> v.uints); #endif } simd_u32_4 operator<<(simd_u32_4 v) const noexcept @@ -428,7 +448,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vshlq_u32(data, vreinterpretq_s32_u32(v.data)); #else - return simd_u32_4(vec << v.vec); + return simd_u32_4(uints << v.uints); #endif } simd_u32_4 operator>>(uint32 n) const noexcept @@ -438,17 +458,17 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vshrq_n_u32(data, n); #else - return simd_u32_4(vec >> n); + return simd_u32_4(uints >> n); #endif } simd_u32_4 operator<<(uint32 n) const noexcept { - #if defined(MATH_SIMD_SUPPORT_AVX2) + #if defined(MATH_SIMD_SUPPORT_SSE) return _mm_slli_epi32(data, n); #elif defined(MATH_SIMD_SUPPORT_NEON) return vshlq_n_u32(data, n); #else - return simd_u32_4(vec << n); + return simd_u32_4(uints << n); #endif } simd_u32_4 operator!() const noexcept @@ -458,7 +478,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vandq_u32(vceqq_u32(data, vdupq_n_u32(0)), vdupq_n_u32(1)); #else - return simd_u32_4(!vec); + return simd_u32_4(!uints); #endif } simd_u32_4 operator~() const noexcept @@ -468,11 +488,20 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vmvnq_u32(data); #else - return simd_u32_4(~vec); + return simd_u32_4(~uints); #endif } //****************************************************************************************************************** + simd_u32_4 operator+(uint32 n) const noexcept { return *this + simd_u32_4(n); } + simd_u32_4 operator-(uint32 n) const noexcept { return *this - simd_u32_4(n); } + simd_u32_4 operator*(uint32 n) const noexcept { return *this * simd_u32_4(n); } + simd_u32_4 operator/(uint32 n) const noexcept { return *this / simd_u32_4(n); } + simd_u32_4 operator%(uint32 n) const noexcept { return *this % simd_u32_4(n); } + simd_u32_4 operator&(uint32 n) const noexcept { return *this & simd_u32_4(n); } + simd_u32_4 operator|(uint32 n) const noexcept { return *this | simd_u32_4(n); } + simd_u32_4 operator^(uint32 n) const noexcept { return *this ^ simd_u32_4(n); } + simd_u32_4& operator+=(simd_u32_4 v) noexcept { *this = *this + v; return *this; } simd_u32_4& operator-=(simd_u32_4 v) noexcept { *this = *this - v; return *this; } simd_u32_4& operator*=(simd_u32_4 v) noexcept { *this = *this * v; return *this; } @@ -504,7 +533,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vminvq_u32(vceqq_u32(data, v.data)) == 0xFFFFFFFFu; #else - return vec == v.vec; + return uints == v.uints; #endif } bool operator!=(simd_u32_4 v) const noexcept @@ -514,7 +543,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return !vminvq_u32(vceqq_u32(data, v.data)); #else - return vec != v.vec; + return uints != v.uints; #endif } simd_u32_4 operator<(simd_u32_4 v) const noexcept @@ -524,7 +553,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcltq_u32(data, v.data); #else - return simd_u32_4(vec < v.vec); + return simd_u32_4(uints < v.uints); #endif } simd_u32_4 operator>(simd_u32_4 v) const noexcept @@ -534,7 +563,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcgtq_u32(data, v.data); #else - return simd_u32_4(vec > v.vec); + return simd_u32_4(uints > v.uints); #endif } simd_u32_4 operator<=(simd_u32_4 v) const noexcept @@ -544,7 +573,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcleq_u32(data, v.data); #else - return simd_u32_4(vec <= v.vec); + return simd_u32_4(uints <= v.uints); #endif } simd_u32_4 operator>=(simd_u32_4 v) const noexcept @@ -554,10 +583,17 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4 #elif defined(MATH_SIMD_SUPPORT_NEON) return vcgeq_u32(data, v.data); #else - return simd_u32_4(vec >= v.vec); + return simd_u32_4(uints >= v.uints); #endif } + bool operator==(uint32 n) const noexcept { return *this == simd_u32_4(n); } + bool operator!=(uint32 n) const noexcept { return *this != simd_u32_4(n); } + simd_u32_4 operator<(uint32 n) const noexcept { return *this < simd_u32_4(n); } + simd_u32_4 operator>(uint32 n) const noexcept { return *this > simd_u32_4(n); } + simd_u32_4 operator<=(uint32 n) const noexcept { return *this <= simd_u32_4(n); } + simd_u32_4 operator>=(uint32 n) const noexcept { return *this >= simd_u32_4(n); } + static const simd_u32_4 zero, one, max; }; @@ -593,7 +629,7 @@ static int getTrues(simd_u32_4 v) noexcept int32x4_t shift = { 0, 1, 2, 3 }; return vaddvq_u32(vshlq_u32(vshrq_n_u32(v.data, 31), shift)); #else - return getTrues(v.vec); + return getTrues(v.uints); #endif } /** @@ -606,7 +642,7 @@ static bool areAllTrue(simd_u32_4 v) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vminvq_u32(v.data) == 0xFFFFFFFFu; #else - return areAllTrue(v.vec); + return areAllTrue(v.uints); #endif } /** @@ -621,7 +657,7 @@ static bool areAllFalse(simd_u32_4 v) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return !vmaxvq_u32(v.data); #else - return areAllFalse(v.vec); + return areAllFalse(v.uints); #endif } /** @@ -636,7 +672,7 @@ static bool areAnyTrue(simd_u32_4 v) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vmaxvq_u32(v.data); #else - return areAnyTrue(v.vec); + return areAnyTrue(v.uints); #endif } /** @@ -649,7 +685,7 @@ static bool areAnyFalse(simd_u32_4 v) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vminvq_u32(v.data) != 0xFFFFFFFFu; #else - return areAnyFalse(v.vec); + return areAnyFalse(v.uints); #endif } @@ -659,16 +695,50 @@ static bool areAnyFalse(simd_u32_4 v) noexcept * @param a first SIMD vector to compare * @param b second SIMD vector to compare */ -static simd_u32_4 compare(simd_u32_4 a, simd_u32_4 b) noexcept +static simd_u32_4 equal(simd_u32_4 a, simd_u32_4 b) noexcept { #if defined(MATH_SIMD_SUPPORT_SSE) return _mm_cmpeq_epi32(a.data, b.data); #elif defined(MATH_SIMD_SUPPORT_NEON) return vceqq_u32(a.data, b.data); #else - return simd_u32_4(compare(a.vec, b.vec)); + return simd_u32_4(equal(a.uints, b.uints)); + #endif +} +/** + * @brief Compares two SIMD vectors component wise if they are not equal. + * + * @param a first SIMD vector to compare + * @param b second SIMD vector to compare + */ +static simd_u32_4 notEqual(simd_u32_4 a, simd_u32_4 b) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + return _mm_xor_si128(_mm_cmpeq_epi32(a.data, b.data), _mm_set1_epi32(0xFFFFFFFF)); + #elif defined(MATH_SIMD_SUPPORT_NEON) + return vmvnq_u32(vceqq_u32(a.data, b.data)); + #else + return simd_u32_4(notEqual(a.uints, b.uints)); #endif } + +/** + * @brief SIMD vector bitwise bit clear component wise. [r = a & (~b)] + * + * @param a first SIMD vector + * @param b second SIMD vector + */ +static simd_u32_4 bic(simd_u32_4 a, simd_u32_4 b) noexcept +{ + #if defined(MATH_SIMD_SUPPORT_SSE) + return _mm_andnot_si128(b.data, a.data); // flipped + #elif defined(MATH_SIMD_SUPPORT_NEON) + return vbicq_u32(a.data, b.data); + #else + return simd_u32_4(a.uints & (~b.uints)); + #endif +} + /** * @brief Returns true if first SIMD vector binary representation is less than the second. * @@ -697,7 +767,7 @@ static simd_u32_4 select(simd_u32_4 c, simd_u32_4 t, simd_u32_4 f) noexcept return vbslq_u32(vreinterpretq_u32_s32(vshrq_n_s32( vreinterpretq_s32_u32(c.data), 31)), t.data, f.data); #else - return simd_u32_4(select(c.vec, t.vec, f.vec)); + return simd_u32_4(select(c.uints, t.uints, f.uints)); #endif } @@ -714,7 +784,7 @@ static simd_u32_4 min(simd_u32_4 a, simd_u32_4 b) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vminq_u32(a.data, b.data); #else - return simd_u32_4(min(a.vec, b.vec)); + return simd_u32_4(min(a.uints, b.uints)); #endif } /** @@ -730,7 +800,7 @@ static simd_u32_4 max(simd_u32_4 a, simd_u32_4 b) noexcept #elif defined(MATH_SIMD_SUPPORT_NEON) return vmaxq_u32(a.data, b.data); #else - return simd_u32_4(max(a.vec, b.vec)); + return simd_u32_4(max(a.uints, b.uints)); #endif } diff --git a/include/math/sphere.hpp b/include/math/sphere.hpp index 9b8a5c0..28a780c 100644 --- a/include/math/sphere.hpp +++ b/include/math/sphere.hpp @@ -30,17 +30,37 @@ namespace math */ struct [[nodiscard]] Sphere { - float3 position; - float radius; - +protected: + simd_f32_4 posRad; +public: /** * @brief Creates a new sphere structure. * * @param radius target sphere radius - * @param[in] position sphere position in 3D space + * @param position sphere position in 3D space + */ + Sphere(float radius = 0.0f, simd_f32_4 position = simd_f32_4::zero) noexcept : + posRad(position) { posRad.setW(radius); } + + /** + * @brief Returns sphere radius. + */ + float getRadius() const noexcept { return posRad.getW(); } + /** + * @brief Sets sphere radius. + * @param radius target sphere radius + */ + void setRadius(float radius) noexcept { return posRad.setW(radius); } + + /** + * @brief Returns position of the sphere in 3D space. + */ + const simd_f32_4& getPosition() const noexcept { return posRad; } + /** + * @brief Sets position of the sphere in 3D space. + * @param position target sphere position in 3D space */ - constexpr Sphere(float radius = 0.0f, const float3& position = float3::zero) noexcept : - position(position), radius(radius) { } + void setPosition(simd_f32_4 position) noexcept { posRad = simd_f32_4(position, posRad.getW()); } static const Sphere one, two, half; }; @@ -50,27 +70,28 @@ inline const Sphere Sphere::two = Sphere(1.0f); inline const Sphere Sphere::half = Sphere(0.25f); /** - * @brief Returns true if point is inside the sphere. + * @brief Returns true if point is inside the sphere in 3D space. * * @param[in] sphere target sphere to check - * @param[in] point target point in the space + * @param point target point in 3D space */ -static constexpr bool isInside(const Sphere& sphere, const float3& point) noexcept +static bool isInside(const Sphere& sphere, simd_f32_4 point) noexcept { - auto difference = sphere.position - point; - return length2(difference) < sphere.radius * sphere.radius; + auto difference = sphere.getPosition() - point; + auto radius = sphere.getRadius(); + return lengthSq3(difference) < radius * radius; } /** - * @brief Calculates closest point on sphere to the specified one. + * @brief Calculates closest point on sphere to the specified one in 3D space. * * @param[in] sphere target sphere to use - * @param[in] point target point in 3D space + * @param point target point in 3D space */ -static float3 closestPoint(const Sphere& sphere, const float3& point) noexcept +static simd_f32_4 closestPoint(const Sphere& sphere, simd_f32_4 point) noexcept { - auto sphereToPoint = normalize(point - sphere.position); - return sphere.position + sphereToPoint * sphere.radius; + auto sphereToPoint = normalize3(point - sphere.getPosition()); + return sphere.getPosition() + sphereToPoint * sphere.getRadius(); } /** @@ -79,11 +100,11 @@ static float3 closestPoint(const Sphere& sphere, const float3& point) noexcept * @param[in] a first sphere to check * @param[in] b second sphere to check */ -static constexpr bool isIntersected(const Sphere& a, const Sphere& b) noexcept +static bool isIntersected(const Sphere& a, const Sphere& b) noexcept { - auto d = a.position - b.position; - auto s = a.radius + b.radius; - return length2(d) <= s * s; + auto d = a.getPosition() - b.getPosition(); + auto s = a.getRadius() + b.getRadius(); + return lengthSq3(d) <= s * s; } /** * @brief Returns true if sphere intersects AABB. @@ -93,9 +114,9 @@ static constexpr bool isIntersected(const Sphere& a, const Sphere& b) noexcept */ static bool isIntersected(const Sphere& a, const Aabb& b) noexcept { - auto c = closestPoint(b, a.position); - auto d2 = length2(a.position - c); - return d2 < a.radius * a.radius; + auto c = closestPoint(b, a.getPosition()); + auto d2 = lengthSq3(a.getPosition() - c), radius = a.getRadius(); + return d2 < radius * radius; } } // namespace math \ No newline at end of file diff --git a/include/math/tone-mapping.hpp b/include/math/tone-mapping.hpp index 078663f..5575d95 100644 --- a/include/math/tone-mapping.hpp +++ b/include/math/tone-mapping.hpp @@ -34,28 +34,28 @@ namespace math constexpr float defaultGamma = 2.2f; /** - * @brief Applies gamma correction to the specified color. + * @brief Applies gamma correction to the specified color. (Fast approximation) * * @details * Gamma correction is used to adjust the brightness of an image or display to match the * nonlinear response of display devices, such as monitors. It compensates for the fact that * displays do not linearly represent the light intensity of a color. * - * @param[in] color target color to gamma correct + * @param color target color to gamma correct * @param invGamma inverse gamma correction value (1.0/x) */ -static float3 gammaCorrection(const float3& color, float invGamma) noexcept +static simd_f32_4 fastGammaCorrection(simd_f32_4 color, float invGamma) noexcept { - return pow(color, float3(invGamma)); + return simd_f32_4(fastPow(color, simd_f32_4(invGamma)), color.getW()); } /** - * @brief Applies gamma correction to the specified color. + * @brief Applies gamma correction to the specified color. (Fast approximation) * @details See the @ref gammaCorrection(). - * @param[in] color target color to gamma correct + * @param color target color to gamma correct */ -static float3 gammaCorrection(const float3& color) noexcept +static simd_f32_4 fastGammaCorrection(simd_f32_4 color) noexcept { - return pow(color, float3(1.0f / defaultGamma)); + return simd_f32_4(fastPow(color, simd_f32_4(1.0f / defaultGamma)), color.getW()); } } // namespace math \ No newline at end of file diff --git a/include/math/triangle.hpp b/include/math/triangle.hpp index 7a44d89..1d32bf3 100644 --- a/include/math/triangle.hpp +++ b/include/math/triangle.hpp @@ -40,7 +40,7 @@ struct [[nodiscard]] Triangle simd_f32_4 p2 = simd_f32_4::zero; /**< Third triangle vertex position. */ /** - * @brief Creates a new triangle (polygon) structure. + * @brief Creates a new triangle (polygon) structure. (In 3D space) * * @param p0 first triangle vertex position in 3D space * @param p1 second triangle vertex position in 3D space @@ -63,10 +63,10 @@ struct [[nodiscard]] Triangle }; /** - * @brief Returns true if point is inside the triangle. + * @brief Returns true if point is inside the triangle in 3D space. * * @param[in] sphere target triangle to check - * @param[in] point target point in the space + * @param point target point in 3D space */ static bool isInside(const Triangle& triangle, simd_f32_4 point) noexcept { @@ -78,11 +78,11 @@ static bool isInside(const Triangle& triangle, simd_f32_4 point) noexcept } /** - * @brief Returns triangle barycentric. + * @brief Returns triangle barycentric in 3D space. * @details Way of expressing the position of a point relative to a triangle. * * @param[in] triangle target triangle to use - * @param[in] point target point in space + * @param point target point in 3D space */ static simd_f32_4 calcBarycentric(const Triangle& triangle, simd_f32_4 point) noexcept { @@ -91,7 +91,7 @@ static simd_f32_4 calcBarycentric(const Triangle& triangle, simd_f32_4 point) no auto v0 = triangle.p1 - p0, v1 = triangle.p2 - p0, v2 = point - p0; auto d00 = dot3(v0, v0), d01 = dot3(v0, v1), d11 = dot3(v1, v1); auto d20 = dot3(v2, v0), d21 = dot3(v2, v1); - auto invDenom = 1.0f / (d00 * d11 - d01 * d01); // TODO: maybe use vectors for these calculations? + auto invDenom = 1.0f / (d00 * d11 - d01 * d01); auto v = (d11 * d20 - d01 * d21) * invDenom; auto w = (d00 * d21 - d01 * d20) * invDenom; return simd_f32_4(1.0f - v - w, v, w, 0.0f); diff --git a/include/math/vector/float.hpp b/include/math/vector/float.hpp index 6d62355..266a60f 100644 --- a/include/math/vector/float.hpp +++ b/include/math/vector/float.hpp @@ -535,10 +535,21 @@ static string toString(float2 v) { return to_string(v.x) + " " + to_string(v.y); * @param a first vector to compare * @param b second vector to compare */ -static uint2 compare(float2 a, float4 b) noexcept +static uint2 equal(float2 a, float4 b) noexcept { return uint2(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0); } +/** + * @brief Compares two vectors component wise if they are not equal. + * + * @param a first vector to compare + * @param b second vector to compare + */ +static uint2 notEqual(float2 a, float4 b) noexcept +{ + return uint2(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0); +} + /** * @brief Returns true if first vector binary representation is less than the second. * @@ -708,7 +719,9 @@ static constexpr bool isClose(float2 a, float2 b, float maxDistSq = 1.0e-12f) no static float2 normalize(float2 v) noexcept { return v * (1.0f / length(v)); } /** * @brief Returns true if vector is normalized with specified tolerance. + * * @param v target vector to check + * @param tolerance floating point precision tolerance */ static bool isNormalized(float2 v, float tolerance = 1.0e-6f) noexcept { @@ -787,10 +800,21 @@ static string toString(float3 v) { return to_string(v.x) + " " + to_string(v.y) * @param a first vector to compare * @param b second vector to compare */ -static uint3 compare(float3 a, float3 b) noexcept +static uint3 equal(float3 a, float3 b) noexcept { return uint3(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, a.z == b.z ? UINT32_MAX : 0); } +/** + * @brief Compares two vectors component wise if they are not equal. + * + * @param a first vector to compare + * @param b second vector to compare + */ +static uint3 notEqual(float3 a, float3 b) noexcept +{ + return uint3(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, a.z != b.z ? UINT32_MAX : 0); +} + /** * @brief Returns true if first vector binary representation is less than the second. * @@ -979,7 +1003,9 @@ static constexpr bool isClose(float3 a, float3 b, float maxDistSq = 1.0e-12f) no static float3 normalize(float3 v) noexcept { return v * (1.0f / length(v)); } /** * @brief Returns true if vector is normalized with specified tolerance. + * * @param v target vector to check + * @param tolerance floating point precision tolerance */ static bool isNormalized(float3 v, float tolerance = 1.0e-6f) noexcept { @@ -1064,11 +1090,23 @@ static string toString(float4 v) * @param a first vector to compare * @param b second vector to compare */ -static uint4 compare(float4 a, float4 b) noexcept +static uint4 equal(float4 a, float4 b) noexcept { return uint4(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, a.z == b.z ? UINT32_MAX : 0, a.w == b.w ? UINT32_MAX : 0); } +/** + * @brief Compares two vectors component wise if they are not equal. + * + * @param a first vector to compare + * @param b second vector to compare + */ +static uint4 notEqual(float4 a, float4 b) noexcept +{ + return uint4(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, + a.z != b.z ? UINT32_MAX : 0, a.w != b.w ? UINT32_MAX : 0); +} + /** * @brief Returns true if first vector binary representation is less than the second. * @@ -1266,7 +1304,9 @@ static constexpr bool isClose(float4 a, float4 b, float maxDistSq = 1.0e-12f) no static float4 normalize(float4 v) noexcept { return v * (1.0f / length(v)); } /** * @brief Returns true if vector is normalized with specified tolerance. + * * @param v target vector to check + * @param tolerance floating point precision tolerance */ static bool isNormalized(float4 v, float tolerance = 1.0e-6f) noexcept { diff --git a/include/math/vector/int.hpp b/include/math/vector/int.hpp index ce7748a..5f60ace 100644 --- a/include/math/vector/int.hpp +++ b/include/math/vector/int.hpp @@ -566,10 +566,21 @@ static string toString(int2 v) { return to_string(v.x) + " " + to_string(v.y); } * @param a first vector to compare * @param b second vector to compare */ -static uint2 compare(int2 a, int2 b) noexcept +static uint2 equal(int2 a, int2 b) noexcept { return uint2(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0); } +/** + * @brief Compares two vectors component wise if they are not equal. + * + * @param a first vector to compare + * @param b second vector to compare + */ +static uint2 notEqual(int2 a, int2 b) noexcept +{ + return uint2(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0); +} + /** * @brief Returns true if first vector binary representation is less than the second. * @@ -663,10 +674,21 @@ static string toString(int3 v) { return to_string(v.x) + " " + to_string(v.y) + * @param a first vector to compare * @param b second vector to compare */ -static uint3 compare(int3 a, int3 b) noexcept +static uint3 equal(int3 a, int3 b) noexcept { return uint3(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, a.z == b.z ? UINT32_MAX : 0); } +/** + * @brief Compares two vectors component wise if they are not equal. + * + * @param a first vector to compare + * @param b second vector to compare + */ +static uint3 notEqual(int3 a, int3 b) noexcept +{ + return uint3(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, a.z != b.z ? UINT32_MAX : 0); +} + /** * @brief Returns true if first vector binary representation is less than the second. * @@ -769,11 +791,23 @@ static string toString(int4 v) * @param a first vector to compare * @param b second vector to compare */ -static uint4 compare(int4 a, int4 b) noexcept +static uint4 equal(int4 a, int4 b) noexcept { return uint4(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, a.z == b.z ? UINT32_MAX : 0, a.w == b.w ? UINT32_MAX : 0); } +/** + * @brief Compares two vectors component wise if they are not equal. + * + * @param a first vector to compare + * @param b second vector to compare + */ +static uint4 notEqual(int4 a, int4 b) noexcept +{ + return uint4(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, + a.z != b.z ? UINT32_MAX : 0, a.w != b.w ? UINT32_MAX : 0); +} + /** * @brief Returns true if first vector binary representation is less than the second. * diff --git a/include/math/vector/uint.hpp b/include/math/vector/uint.hpp index 5dfd2b3..7032ee5 100644 --- a/include/math/vector/uint.hpp +++ b/include/math/vector/uint.hpp @@ -539,10 +539,21 @@ static constexpr bool areAnyFalse(uint2 v) noexcept { return (v.x & v.y) != UINT * @param a first vector to compare * @param b second vector to compare */ -static uint2 compare(uint2 a, uint2 b) noexcept +static uint2 equal(uint2 a, uint2 b) noexcept { return uint2(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0); } +/** + * @brief Compares two vectors component wise if they are not equal. + * + * @param a first vector to compare + * @param b second vector to compare + */ +static uint2 notEqual(uint2 a, uint2 b) noexcept +{ + return uint2(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0); +} + /** * @brief Returns true if first vector binary representation is less than the second. * @@ -666,10 +677,21 @@ static constexpr bool areAnyFalse(uint3 v) noexcept { return (v.x & v.y & v.z) ! * @param a first vector to compare * @param b second vector to compare */ -static uint3 compare(uint3 a, uint3 b) noexcept +static uint3 equal(uint3 a, uint3 b) noexcept { return uint3(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, a.z == b.z ? UINT32_MAX : 0); } +/** + * @brief Compares two vectors component wise if they are not equal. + * + * @param a first vector to compare + * @param b second vector to compare + */ +static uint3 notEqual(uint3 a, uint3 b) noexcept +{ + return uint3(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, a.z != b.z ? UINT32_MAX : 0); +} + /** * @brief Returns true if first vector binary representation is less than the second. * @@ -796,11 +818,23 @@ static constexpr bool areAnyFalse(uint4 v) noexcept { return (v.x & v.y & v.z & * @param a first vector to compare * @param b second vector to compare */ -static uint4 compare(uint4 a, uint4 b) noexcept +static uint4 equal(uint4 a, uint4 b) noexcept { return uint4(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, a.z == b.z ? UINT32_MAX : 0, a.w == b.w ? UINT32_MAX : 0); } +/** + * @brief Compares two vectors component wise if they are not equal. + * + * @param a first vector to compare + * @param b second vector to compare + */ +static uint4 notEqual(uint4 a, uint4 b) noexcept +{ + return uint4(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, + a.z != b.z ? UINT32_MAX : 0, a.w != b.w ? UINT32_MAX : 0); +} + /** * @brief Returns true if first vector binary representation is less than the second. * diff --git a/source/aabb.cpp b/source/aabb.cpp index b00676d..0f6736b 100644 --- a/source/aabb.cpp +++ b/source/aabb.cpp @@ -15,191 +15,79 @@ #include "math/aabb.hpp" //********************************************************************************************************************** -math::float2 math::raycast2(const Aabb& aabb, const Ray& ray) noexcept -{/* - auto origin = ray.origin; - auto invDirection = float3::one / ray.getDirection(); - auto aabbMin = aabb.getMin(), aabbMax = aabb.getMax(); - - auto t1 = (aabbMin.x - origin.x) * invDirection.x; - auto t2 = (aabbMax.x - origin.x) * invDirection.x; - auto t3 = (aabbMin.y - origin.y) * invDirection.y; - auto t4 = (aabbMax.y - origin.y) * invDirection.y; - auto t5 = (aabbMin.z - origin.z) * invDirection.z; - auto t6 = (aabbMax.z - origin.z) * invDirection.z; - - auto tMin = std::max(std::max(std::min(t1, t2), std::min(t3, t4)), std::min(t5, t6)); - auto tMax = std::min(std::min(std::max(t1, t2), std::max(t3, t4)), std::max(t5, t6)); - return float2(tMin, tMax); - */ - return float2::zero; -} -math::float2 math::raycast2I(const Aabb& aabb, const Ray& ray) noexcept -{ - /* - auto origin = ray.origin; - auto invDirection = ray.getDirection(); - auto aabbMin = aabb.getMin(), aabbMax = aabb.getMax(); - - auto t1 = (aabbMin.x - origin.x) * invDirection.x; - auto t2 = (aabbMax.x - origin.x) * invDirection.x; - auto t3 = (aabbMin.y - origin.y) * invDirection.y; - auto t4 = (aabbMax.y - origin.y) * invDirection.y; - auto t5 = (aabbMin.z - origin.z) * invDirection.z; - auto t6 = (aabbMax.z - origin.z) * invDirection.z; - - auto tMin = std::max(std::max(std::min(t1, t2), std::min(t3, t4)), std::min(t5, t6)); - auto tMax = std::min(std::min(std::max(t1, t2), std::max(t3, t4)), std::max(t5, t6)); - return float2(tMin, tMax); - */ - return float2::zero; -} - -float math::raycast1(const Aabb& aabb, const Ray& ray) noexcept -{ - /* - auto origin = ray.origin; - auto invDirection = float3::one / ray.getDirection(); - auto aabbMin = aabb.getMin(), aabbMax = aabb.getMax(); - - auto t1 = (aabbMin.x - origin.x) * invDirection.x; - auto t2 = (aabbMax.x - origin.x) * invDirection.x; - auto t3 = (aabbMin.y - origin.y) * invDirection.y; - auto t4 = (aabbMax.y - origin.y) * invDirection.y; - auto t5 = (aabbMin.z - origin.z) * invDirection.z; - auto t6 = (aabbMax.z - origin.z) * invDirection.z; - - return std::max(std::max(std::min(t1, t2), std::min(t3, t4)), std::min(t5, t6)); - */ - return {}; -} -float math::raycast1I(const Aabb& aabb, const Ray& ray) noexcept -{ - /* - auto origin = ray.origin; - auto invDirection = ray.getDirection(); - auto aabbMin = aabb.getMin(), aabbMax = aabb.getMax(); - - auto t1 = (aabbMin.x - origin.x) * invDirection.x; - auto t2 = (aabbMax.x - origin.x) * invDirection.x; - auto t3 = (aabbMin.y - origin.y) * invDirection.y; - auto t4 = (aabbMax.y - origin.y) * invDirection.y; - auto t5 = (aabbMin.z - origin.z) * invDirection.z; - auto t6 = (aabbMax.z - origin.z) * invDirection.z; - - return std::max(std::max(std::min(t1, t2), std::min(t3, t4)), std::min(t5, t6)); - */ - return {}; -} - -//********************************************************************************************************************** -bool math::isAabbIntersected(const float3& center, float3 extent, const Triangle& triangle) noexcept +bool math::isAabbIntersected(simd_f32_4 position, simd_f32_4 extent, const Triangle& triangle) noexcept { - /* - auto v0 = triangle.p0 - center; - auto v1 = triangle.p1 - center; - auto v2 = triangle.p2 - center; - auto f0 = v1 - v0, f1 = v2 - v1, f2 = v0 - v2; - - auto a = float3(0.0f, -f0.z, f0.y); - auto p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a); - auto r = extent.y * std::abs(f0.z) + extent.z * std::abs(f0.y); - if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r) - return false; - - a = float3(0.0f, -f1.z, f1.y); - p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a); - r = extent.y * std::abs(f1.z) + extent.z * std::abs(f1.y); - if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r) + auto v = simd_f32_4x4(triangle.p0 - position, + triangle.p1 - position, triangle.p2 - position, simd_f32_4::zero); + auto f0 = v.c1 - v.c0, f1 = v.c2 - v.c1, f2 = v.c0 - v.c2; + auto f0X = f0.getX(), f0Y = f0.getY(), f0Z = f0.getZ(); + auto f1X = f1.getX(), f1Y = f1.getY(), f1Z = f1.getZ(); + auto f2X = f2.getX(), f2Y = f2.getY(), f2Z = f2.getZ(); + auto extX = extent.getX(), extY = extent.getY(), extZ = extent.getZ(); + + auto a = simd_f32_4(0.0f, -f0Z, f0Y); + auto p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a)); + auto r = extY * std::abs(f0Z) + extZ * std::abs(f0Y); + if (std::max(-max3(p), min3(p)) > r) return false; - a = float3(0.0f, -f2.z, f2.y); - p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a); - r = extent.y * std::abs(f2.z) + extent.z * std::abs(f2.y); - if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r) + a = simd_f32_4(0.0f, -f1Z, f1Y); + p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a)); + r = extY * std::abs(f1Z) + extZ * std::abs(f1Y); + if (std::max(-max3(p), min3(p)) > r) return false; - a = float3(f0.z, 0.0f, -f0.x); - p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a); - r = extent.x * std::abs(f0.z) + extent.z * std::abs(f0.x); - if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r) + a = simd_f32_4(0.0f, -f2Z, f2Y); + p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a)); + r = extY * std::abs(f2Z) + extZ * std::abs(f2Y); + if (std::max(-max3(p), min3(p)) > r) return false; - a = float3(f1.z, 0.0f, -f1.x); - p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a); - r = extent.x * std::abs(f1.z) + extent.z * std::abs(f1.x); - if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r) + a = simd_f32_4(f0Z, 0.0f, -f0X); + p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a)); + r = extX * std::abs(f0Z) + extZ * std::abs(f0X); + if (std::max(-max3(p), min3(p)) > r) return false; - a = float3(f2.z, 0.0f, -f2.x); - p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a); - r = extent.x * std::abs(f2.z) + extent.z * std::abs(f2.x); - if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r) + a = simd_f32_4(f1Z, 0.0f, -f1X); + p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a)); + r = extX * std::abs(f1Z) + extZ * std::abs(f1X); + if (std::max(-max3(p), min3(p)) > r) return false; - a = float3(-f0.y, f0.x, 0.0f); - p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a); - r = extent.x * std::abs(f0.y) + extent.y * std::abs(f0.x); - if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r) + a = simd_f32_4(f2Z, 0.0f, -f2X); + p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a)); + r = extX * std::abs(f2Z) + extZ * std::abs(f2X); + if (std::max(-max3(p), min3(p)) > r) return false; - a = float3(-f1.y, f1.x, 0.0f); - p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a); - r = extent.x * std::abs(f1.y) + extent.y * std::abs(f1.x); - if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r) + a = simd_f32_4(-f0Y, f0X, 0.0f); + p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a)); + r = extX * std::abs(f0Y) + extY * std::abs(f0X); + if (std::max(-max3(p), min3(p)) > r) return false; - a = float3(-f2.y, f2.x, 0.0f); - p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a); - r = extent.x * std::abs(f2.y) + extent.y * std::abs(f2.x); - if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r) + a = simd_f32_4(-f1Y, f1X, 0.0f); + p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a)); + r = extX * std::abs(f1Y) + extY * std::abs(f1X); + if (std::max(-max3(p), min3(p)) > r) return false; - if (max(v0.x, v1.x, v2.x) < -extent.x || min(v0.x, v1.x, v2.x) > extent.x) - return false; - if (max(v0.y, v1.y, v2.y) < -extent.y || min(v0.y, v1.y, v2.y) > extent.y) + a = simd_f32_4(-f2Y, f2X, 0.0f); + p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a)); + r = extX * std::abs(f2Y) + extY * std::abs(f2X); + if (std::max(-max3(p), min3(p)) > r) return false; - if (max(v0.z, v1.z, v2.z) < -extent.z || min(v0.z, v1.z, v2.z) > extent.z) - return false; - - auto normal = cross(f0, f1); - auto distance = std::abs(dot(normal, v0)); - r = extent.x * std::abs(normal.x) + extent.y * std::abs(normal.y) + extent.z * std::abs(normal.z); - return distance <= r; - */ - return {}; -} -//********************************************************************************************************************** -bool math::isBehindFrustum(const Aabb& aabb, const float4x4& model, const Plane* planes, uint8 planeCount) noexcept -{ - /* - auto min = aabb.getMin(), max = aabb.getMax(); - auto v0 = model * float4(min, 1.0f); - auto v1 = model * float4(min.x, min.y, max.z, 1.0f); - auto v2 = model * float4(min.x, max.y, min.z, 1.0f); - auto v3 = model * float4(min.x, max.y, max.z, 1.0f); - auto v4 = model * float4(max.x, min.y, min.z, 1.0f); - auto v5 = model * float4(max.x, min.y, max.z, 1.0f); - auto v6 = model * float4(max.x, max.y, min.z, 1.0f); - auto v7 = model * float4(max, 1.0f); - - for (uint8 i = 0; i < planeCount; i++) + auto t = transpose3x3(v); + if (max3(t.c0) < -extX || min3(t.c0) > extX || max3(t.c1) < -extY || + min3(t.c1) > extY || max3(t.c2) < -extZ || min3(t.c2) > extZ) { - auto plane = planes[i]; - auto distance0 = distance(plane, v0), distance1 = distance(plane, v1); - auto distance2 = distance(plane, v2), distance3 = distance(plane, v3); - auto distance4 = distance(plane, v4), distance5 = distance(plane, v5); - auto distance6 = distance(plane, v6), distance7 = distance(plane, v7); - - if (distance0 < 0.0f && distance1 < 0.0f && distance2 < 0.0f && distance3 < 0.0f && - distance4 < 0.0f && distance5 < 0.0f && distance6 < 0.0f && distance7 < 0.0f) - { - return true; - } + return false; } - return false; - */ - return {}; + auto normal = cross3(f0, f1); + auto distance = std::abs(dot3(normal, v.c0)); + r = extX * std::abs(normal.getX()) + extY * std::abs(normal.getY()) + extZ * std::abs(normal.getZ()); + return distance <= r; } \ No newline at end of file diff --git a/source/bvh.cpp b/source/bvh.cpp index 327b60f..b982637 100644 --- a/source/bvh.cpp +++ b/source/bvh.cpp @@ -20,39 +20,40 @@ #include #include +// TODO: Utilize Jolt BVH building and update optimizations. + using namespace math; //********************************************************************************************************************** static Aabb calculateNodeAabb(const Bvh::Node* node, const uint8* vertices, const uint8* indices, const uint32* primitives, uint32 vertexSize, uint32 indexSize, const function& getIndex) { - auto lastPrimitive = node->leaf.firstPrimitive + node->base.primitiveCount; - float3 min = float3(FLT_MAX), max = float3(-FLT_MAX); - for (uint32 i = node->leaf.firstPrimitive; i < lastPrimitive; i++) + auto lastPrimitive = node->getFirstPrimitive() + node->getPrimitiveCount(); + simd_f32_4 min = simd_f32_4::max, max = simd_f32_4::minusMax; + for (uint32 i = node->getFirstPrimitive(); i < lastPrimitive; i++) { auto offset = primitives[i] * Triangle::pointCount; auto i0 = getIndex(indices + (offset ) * indexSize); auto i1 = getIndex(indices + (offset + 1) * indexSize); auto i2 = getIndex(indices + (offset + 2) * indexSize); - auto v0 = *(const float3*)(vertices + i0 * vertexSize); - auto v1 = *(const float3*)(vertices + i1 * vertexSize); - auto v2 = *(const float3*)(vertices + i2 * vertexSize); - min = ::min(min, v0); max = ::max(max, v0); - min = ::min(min, v1); max = ::max(max, v1); - min = ::min(min, v2); max = ::max(max, v2); + auto v0 = simd_f32_4(*(const float3*)(vertices + i0 * vertexSize)); + auto v1 = simd_f32_4(*(const float3*)(vertices + i1 * vertexSize)); + auto v2 = simd_f32_4(*(const float3*)(vertices + i2 * vertexSize)); + min = math::min(math::min(math::min(min, v0), v1), v2); + max = math::max(math::max(math::max(max, v0), v1), v2); } return Aabb(min, max); } static Aabb calculateNodeAabb(const Bvh::Node* node, const Aabb* aabbs, const uint32* primitives) { - auto lastPrimitive = node->leaf.firstPrimitive + node->base.primitiveCount; - float3 min = float3(FLT_MAX), max = float3(-FLT_MAX); - for (uint32 i = node->leaf.firstPrimitive; i < lastPrimitive; i++) + auto lastPrimitive = node->getFirstPrimitive() + node->getPrimitiveCount(); + simd_f32_4 min = simd_f32_4::max, max = simd_f32_4::minusMax; + for (uint32 i = node->getFirstPrimitive(); i < lastPrimitive; i++) { auto aabb = aabbs[primitives[i]]; - min = ::min(min, aabb.getMin()); - max = ::max(max, aabb.getMax()); + min = math::min(min, aabb.getMin()); + max = math::max(max, aabb.getMax()); } return Aabb(min, max); } @@ -60,14 +61,23 @@ static Aabb calculateNodeAabb(const Bvh::Node* node, const Aabb* aabbs, const ui //********************************************************************************************************************** #define BIN_COUNT 8 +namespace +{ + struct Bin final + { + simd_f32_4 min = simd_f32_4::max; + simd_f32_4 max = simd_f32_4::minusMax; + }; +} + static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, uint32 vertexSize, uint32 indexSize, - const uint8* vertices, const uint8* indices, const uint32* primitives, const float3* centroids, + const uint8* vertices, const uint8* indices, const uint32* primitives, const simd_f32_4* centroids, const function& getIndex, int32& axis, float& split, float& cost) { auto bestAxis = 0; auto bestSplit = 0.0f, bestCost = FLT_MAX; - for (uint8 a = 0; a < 3; a++) + for (int a = 0; a < 3; a++) { auto boundsMin = FLT_MAX, boundsMax = -FLT_MAX; for (uint32 i = firstPrimitive; i < lastPrimitive; i++) @@ -80,14 +90,9 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, uint if (boundsMin == boundsMax) continue; - struct Bin - { - float3 min = float3(FLT_MAX); - float3 max = float3(-FLT_MAX); - int32 primitiveCount = 0; - } bins[BIN_COUNT] = {}; - + Bin bins[BIN_COUNT] = {}; auto scale = BIN_COUNT / (boundsMax - boundsMin); + for (uint32 i = firstPrimitive; i < lastPrimitive; i++) { auto index = primitives[i]; @@ -95,33 +100,32 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, uint auto i0 = getIndex(indices + (offset ) * indexSize); auto i1 = getIndex(indices + (offset + 1) * indexSize); auto i2 = getIndex(indices + (offset + 2) * indexSize); - auto v0 = *(const float3*)(vertices + i0 * vertexSize); - auto v1 = *(const float3*)(vertices + i1 * vertexSize); - auto v2 = *(const float3*)(vertices + i2 * vertexSize); + auto v0 = simd_f32_4(*(const float3*)(vertices + i0 * vertexSize)); + auto v1 = simd_f32_4(*(const float3*)(vertices + i1 * vertexSize)); + auto v2 = simd_f32_4(*(const float3*)(vertices + i2 * vertexSize)); auto binIndex = std::min((int32)(BIN_COUNT - 1), (int32)(( centroids[index][a] - boundsMin) * scale)); auto& bin = bins[binIndex]; - bin.min = min(bin.min, v0); bin.max = max(bin.max, v0); - bin.min = min(bin.min, v1); bin.max = max(bin.max, v1); - bin.min = min(bin.min, v2); bin.max = max(bin.max, v2); - bin.primitiveCount++; + bin.min = min(min(min(bin.min, v0), v1), v2); + bin.max = max(max(max(bin.max, v0), v1), v2); + bin.max.ints.w++; } float leftArea[BIN_COUNT - 1], rightArea[BIN_COUNT - 1]; int32 leftCount[BIN_COUNT - 1], rightCount[BIN_COUNT - 1]; - float3 leftMin = float3(FLT_MAX), leftMax = float3(-FLT_MAX); - float3 rightMin = float3(FLT_MAX), rightMax = float3(-FLT_MAX); + simd_f32_4 leftMin = simd_f32_4::max, leftMax = simd_f32_4::minusMax; + simd_f32_4 rightMin = simd_f32_4::max, rightMax = simd_f32_4::minusMax; int32 leftSum = 0, rightSum = 0; - for (uint8 i = 0; i < BIN_COUNT - 1; i++) + for (int i = 0; i < BIN_COUNT - 1; i++) { - leftSum += bins[i].primitiveCount; + leftSum += bins[i].max.ints.w; leftCount[i] = leftSum; leftMin = min(leftMin, bins[i].min); leftMax = max(leftMax, bins[i].max); leftArea[i] = Aabb(leftMin, leftMax).calcArea(); - rightSum += bins[(BIN_COUNT - 1) - i].primitiveCount; + rightSum += bins[(BIN_COUNT - 1) - i].max.ints.w; rightCount[(BIN_COUNT - 2) - i] = rightSum; rightMin = min(rightMin, bins[(BIN_COUNT - 1) - i].min); rightMax = max(rightMax, bins[(BIN_COUNT - 1) - i].max); @@ -129,7 +133,7 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, uint } scale = (boundsMax - boundsMin) / BIN_COUNT; - for (uint8 i = 0; i < BIN_COUNT - 1; i++) + for (int i = 0; i < BIN_COUNT - 1; i++) { float planeCost = leftCount[i] * leftArea[i] + rightCount[i] * rightArea[i]; @@ -149,12 +153,12 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, uint //********************************************************************************************************************** static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, const Aabb* aabbs, - const uint32* primitives, const float3* centroids, int32& axis, float& split, float& cost) + const uint32* primitives, const simd_f32_4* centroids, int32& axis, float& split, float& cost) { auto bestAxis = 0; auto bestSplit = 0.0f, bestCost = FLT_MAX; - for (uint8 a = 0; a < 3; a++) + for (int a = 0; a < 3; a++) { auto boundsMin = FLT_MAX, boundsMax = -FLT_MAX; for (uint32 i = firstPrimitive; i < lastPrimitive; i++) @@ -167,14 +171,9 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, cons if (boundsMin == boundsMax) continue; - struct Bin - { - float3 min = float3(FLT_MAX); - float3 max = float3(-FLT_MAX); - int32 primitiveCount = 0; - } bins[BIN_COUNT] = {}; - + Bin bins[BIN_COUNT] = {}; auto scale = BIN_COUNT / (boundsMax - boundsMin); + for (uint32 i = firstPrimitive; i < lastPrimitive; i++) { auto index = primitives[i]; @@ -184,24 +183,24 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, cons auto& bin = bins[binIndex]; bin.min = min(bin.min, aabb.getMin()); bin.max = max(bin.max, aabb.getMax()); - bin.primitiveCount++; + bin.max.ints.w++; } // TODO: share this function part with a previous one? float leftArea[BIN_COUNT - 1], rightArea[BIN_COUNT - 1]; int32 leftCount[BIN_COUNT - 1], rightCount[BIN_COUNT - 1]; - float3 leftMin = float3(FLT_MAX), leftMax = float3(-FLT_MAX); - float3 rightMin = float3(FLT_MAX), rightMax = float3(-FLT_MAX); + simd_f32_4 leftMin = simd_f32_4::max, leftMax = simd_f32_4::minusMax; + simd_f32_4 rightMin = simd_f32_4::max, rightMax = simd_f32_4::minusMax; int32 leftSum = 0, rightSum = 0; - for (uint8 i = 0; i < BIN_COUNT - 1; i++) + for (int i = 0; i < BIN_COUNT - 1; i++) { - leftSum += bins[i].primitiveCount; + leftSum += bins[i].max.ints.w; leftCount[i] = leftSum; leftMin = min(leftMin, bins[i].min); leftMax = max(leftMax, bins[i].max); leftArea[i] = Aabb(leftMin, leftMax).calcArea(); - rightSum += bins[(BIN_COUNT - 1) - i].primitiveCount; + rightSum += bins[(BIN_COUNT - 1) - i].max.ints.w; rightCount[(BIN_COUNT - 2) - i] = rightSum; rightMin = min(rightMin, bins[(BIN_COUNT - 1) - i].min); rightMax = max(rightMax, bins[(BIN_COUNT - 1) - i].max); @@ -209,7 +208,7 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, cons } scale = (boundsMax - boundsMin) / BIN_COUNT; - for (uint8 i = 0; i < BIN_COUNT - 1; i++) + for (int i = 0; i < BIN_COUNT - 1; i++) { float planeCost = leftCount[i] * leftArea[i] + rightCount[i] * rightArea[i]; @@ -229,17 +228,17 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, cons //********************************************************************************************************************** Bvh::Bvh(const uint8* vertices, const uint8* indices, const Aabb& aabb, - uint32 indexCount, uint32 vertexSize, uint32 indexSize, const float3* _centroids) + uint32 indexCount, uint32 vertexSize, uint32 indexSize, const simd_f32_4* _centroids) { recreate(vertices, indices, aabb, indexCount, vertexSize, indexSize, _centroids); } -Bvh::Bvh(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const float3* _centroids) +Bvh::Bvh(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const simd_f32_4* _centroids) { recreate(aabbs, aabb, aabbCount, _centroids); } void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb, - uint32 indexCount, uint32 vertexSize, uint32 indexSize, const float3* _centroids) + uint32 indexCount, uint32 vertexSize, uint32 indexSize, const simd_f32_4* _centroids) { assert(vertices); assert(indices); @@ -262,7 +261,7 @@ void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb getIndex = getIndex16; else if (indexSize != sizeof(uint32)) abort(); - const float3* centroidData; + const simd_f32_4* centroidData; if (!_centroids) { centroids.resize(primitiveCount); @@ -274,10 +273,10 @@ void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb auto i0 = getIndex(indices + (offset ) * indexSize); auto i1 = getIndex(indices + (offset + 1) * indexSize); auto i2 = getIndex(indices + (offset + 2) * indexSize); - auto v0 = (const float3*)(vertices + i0 * vertexSize); - auto v1 = (const float3*)(vertices + i1 * vertexSize); - auto v2 = (const float3*)(vertices + i2 * vertexSize); - centroids[i] = (*v0 + *v1 + *v2) * (1.0f / 3.0f); + auto v0 = simd_f32_4(*(const float3*)(vertices + i0 * vertexSize)); + auto v1 = simd_f32_4(*(const float3*)(vertices + i1 * vertexSize)); + auto v2 = simd_f32_4(*(const float3*)(vertices + i2 * vertexSize)); + centroids[i] = (v0 + v1 + v2) * (1.0f / 3.0f); } } else @@ -287,21 +286,21 @@ void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb auto nodeCount = (uint32)1; auto node = &nodes[0]; - node->setAabb(aabb); - node->leaf.primitiveCount = primitiveCount; - node->leaf.firstPrimitive = 0; + node->aabb = aabb; + node->setPrimitiveCount(primitiveCount); + node->setFirstPrimitive(0); while (true) { - auto firstPrimitive = node->leaf.firstPrimitive; - auto primitiveCount = node->leaf.primitiveCount; + auto firstPrimitive = node->getFirstPrimitive(); + auto primitiveCount = node->getPrimitiveCount(); auto lastPrimitive = firstPrimitive + primitiveCount; int32 axis; float split, cost; findBestSplitPlane(firstPrimitive, lastPrimitive, vertexSize, indexSize, vertices, indices, primitives.data(), centroidData, getIndex, axis, split, cost); - auto nodeCost = node->getAabb().calcArea() * primitiveCount; + auto nodeCost = node->aabb.calcArea() * primitiveCount; if (cost >= nodeCost) { if (nodeStack.empty()) @@ -340,20 +339,20 @@ void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb auto leftNodeID = nodeCount; auto rightNodeID = nodeCount + 1; nodeCount += 2; - node->child.leftNode = leftNodeID; - node->child.primitiveCount = 0; + node->setLeftNode(leftNodeID); + node->setPrimitiveCount(0); auto node1 = &nodes[leftNodeID]; - node1->leaf.firstPrimitive = firstPrimitive; - node1->leaf.primitiveCount = count1; - node1->setAabb(calculateNodeAabb(node1, vertices, - indices, primitives.data(), vertexSize, indexSize, getIndex)); + node1->setFirstPrimitive(firstPrimitive); + node1->setPrimitiveCount(count1); + node1->aabb = calculateNodeAabb(node1, vertices, indices, + primitives.data(), vertexSize, indexSize, getIndex); auto node2 = &nodes[rightNodeID]; - node2->leaf.firstPrimitive = i; - node2->leaf.primitiveCount = count2; - node2->setAabb(calculateNodeAabb(node2, vertices, - indices, primitives.data(), vertexSize, indexSize, getIndex)); + node2->setFirstPrimitive(i); + node2->setPrimitiveCount(count2); + node2->aabb = calculateNodeAabb(node2, vertices, indices, + primitives.data(), vertexSize, indexSize, getIndex); if (count2 > count1) { @@ -383,7 +382,7 @@ void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb } //********************************************************************************************************************** -void Bvh::recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const float3* _centroids) +void Bvh::recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const simd_f32_4* _centroids) { assert(aabbs); assert(aabbCount > 0); @@ -394,7 +393,7 @@ void Bvh::recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const for (uint32 i = 0; i < aabbCount; i++) primitiveData[i] = i; - const float3* centroidData; + const simd_f32_4* centroidData; if (!_centroids) { centroids.resize(aabbCount); @@ -414,21 +413,21 @@ void Bvh::recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const auto nodeData = nodes.data(); auto nodeCount = (uint32)1; auto node = &nodeData[0]; - node->setAabb(aabb); - node->leaf.primitiveCount = aabbCount; - node->leaf.firstPrimitive = 0; + node->aabb = aabb; + node->setPrimitiveCount(aabbCount); + node->setFirstPrimitive(0); while (true) { - auto firstPrimitive = node->leaf.firstPrimitive; - auto primitiveCount = node->leaf.primitiveCount; + auto firstPrimitive = node->getFirstPrimitive(); + auto primitiveCount = node->getPrimitiveCount(); auto lastPrimitive = firstPrimitive + primitiveCount; int32 axis; float split, cost; findBestSplitPlane(firstPrimitive, lastPrimitive, aabbs, primitiveData, centroidData, axis, split, cost); - auto nodeCost = node->getAabb().calcArea() * primitiveCount; + auto nodeCost = node->aabb.calcArea() * primitiveCount; if (cost >= nodeCost) { if (nodeStack.empty()) @@ -467,18 +466,18 @@ void Bvh::recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const auto leftNodeID = nodeCount; auto rightNodeID = nodeCount + 1; nodeCount += 2; - node->child.leftNode = leftNodeID; - node->child.primitiveCount = 0; + node->setLeftNode(leftNodeID); + node->setPrimitiveCount(0); auto node1 = &nodeData[leftNodeID]; - node1->leaf.firstPrimitive = firstPrimitive; - node1->leaf.primitiveCount = count1; - node1->setAabb(calculateNodeAabb(node1, aabbs, primitiveData)); + node1->setFirstPrimitive(firstPrimitive); + node1->setPrimitiveCount(count1); + node1->aabb = calculateNodeAabb(node1, aabbs, primitiveData); auto node2 = &nodeData[rightNodeID]; - node2->leaf.firstPrimitive = i; - node2->leaf.primitiveCount = count2; - node2->setAabb(calculateNodeAabb(node2, aabbs, primitiveData)); + node2->setFirstPrimitive(i); + node2->setPrimitiveCount(count2); + node2->aabb = calculateNodeAabb(node2, aabbs, primitiveData); if (count2 > count1) { diff --git a/tests/test-containers.cpp b/tests/test-containers.cpp new file mode 100644 index 0000000..115e7a3 --- /dev/null +++ b/tests/test-containers.cpp @@ -0,0 +1,40 @@ +// Copyright 2022-2025 Nikita Fediuchin. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "math/angles.hpp" +#include "math/sphere.hpp" +#include "math/matrix/projection.hpp" +#include + +using namespace math; + +static void testPlanes() +{ + auto plane = Plane(simd_f32_4(1.0f, 2.0f, 3.0f), 12.3f); + if (!isNormalized3(plane.getNormal())) + throw runtime_error("Plane is not normalized."); + + Plane frustumPlanes[Plane::frustumCount]; + auto viewProj = calcPerspProjInfRevZ(radians(90.0f), 16.0f / 9.0f, 0.01f); + extractFrustumPlanes(simd_f32_4x4(viewProj), frustumPlanes); + normalizePlanes(frustumPlanes, Plane::frustumCount); +} + +// TODO: test other containers. + +int main() +{ + testPlanes(); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/tests/test-matrix.cpp b/tests/test-matrix.cpp index 8008ecf..ccfa7ea 100644 --- a/tests/test-matrix.cpp +++ b/tests/test-matrix.cpp @@ -12,26 +12,72 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "math/matrix.hpp" +#include "math/matrix/transform.hpp" +#include "math/angles.hpp" #include using namespace math; -static void cmp(simd_f32_4x4 a, float4x4 b) +static void cmp(simd_f32_4 a, float4 b, float tolerance = 1.0e-9f) +{ + for (int i = 0; i < 4; i++) + { + if (isnan(a[i])) + a[i] = 0.0f; + if (isnan(b[i])) + b[i] = 0.0f; + } + + auto difference = distanceSq((float4)a, b); + if (difference > tolerance) + throw runtime_error("Float4 vectors test failed."); +} +static void cmp(simd_f32_4 a, float3 b, float tolerance = 1.0e-9f) +{ + for (int i = 0; i < 3; i++) + { + if (isnan(a[i])) + a[i] = 0.0f; + if (isnan(b[i])) + b[i] = 0.0f; + } + + auto difference = distanceSq((float3)a, b); + if (difference > tolerance) + throw runtime_error("Float3 vectors test failed."); +} +static void cmp(simd_f32_4x4 a, float4x4 b, float tolerance = 1.0e-9f) { for (int c = 0; c < 4; c++) { for (int r = 0; r < 4; r++) { - if (isnan(a.columns[c][r])) - a.columns[c][r] = 0.0f; + if (isnan(a[c][r])) + a[c][r] = 0.0f; + if (isnan(b[c][r])) + b[c][r] = 0.0f; + } + + auto difference = distanceSq((float4)a[c], b[c]); + if (difference > tolerance) + throw runtime_error("Float4 matrices test failed."); + } +} +static void cmp(simd_f32_4x4 a, float3x3 b, float tolerance = 1.0e-9f) +{ + for (int c = 0; c < 3; c++) + { + for (int r = 0; r < 3; r++) + { + if (isnan(a[c][r])) + a[c][r] = 0.0f; if (isnan(b[c][r])) b[c][r] = 0.0f; } - auto difference = distanceSq((float4)a.columns[c], b[c]); - if (difference > 1.0e-10f) - throw runtime_error("Float matrices test failed."); + auto difference = distanceSq((float3)a[c], b[c]); + if (difference > tolerance) + throw runtime_error("Float4 matrices test failed."); } } static void cmp(int a, int b) @@ -40,23 +86,25 @@ static void cmp(int a, int b) throw runtime_error("Int test failed."); } -int main() -{ - const auto matLeft = float4x4( - 0.0f, 2.3f, 4.56f, 78.9f, - 1.0f, 0.023f, 4.5f, 6789.0f, - 0.01f, 0.0f, 23.456f, 7.089f, - 1.234f, 5.0006f, 78.09f, 0.0f); - const auto matRight = float4x4( - 1.0f, -23.0f, 4.56f, -0.0789, - 10.023f, -4.0f, -56.708f, 0.009f, - -100.0f, 23.0405f, 0.0678f, -0.9f, - 1.234f, 6789.0f, 500.0f, 1.0f); - auto simdLeft = simd_f32_4x4(matLeft), simdRight = simd_f32_4x4(matRight); +static constexpr auto matLeft = float4x4( + 0.0f, 2.3f, 4.56f, 78.9f, + 1.0f, 0.023f, 4.5f, 6789.0f, + 0.01f, 0.0f, 23.456f, 7.089f, + 1.234f, 5.0006f, 78.09f, 0.0f); +static constexpr auto matRight = float4x4( + 1.0f, -23.0f, 4.56f, -0.0789, + 10.023f, -4.0f, -56.708f, 0.009f, + -100.0f, 23.0405f, 0.0678f, -0.9f, + 1.234f, 6789.0f, 500.0f, 1.0f); +static const auto simdLeft = simd_f32_4x4(matLeft), simdRight = simd_f32_4x4(matRight); +static void testFloatMatrices() +{ cmp(simdLeft, matLeft); cmp(simdRight, matRight); cmp(simdLeft * simdRight, matLeft * matRight); + cmp(simdLeft * simdRight.c0, matLeft * matRight.c0); + cmp(simdRight * simdRight.c0, matRight * matRight.c0); cmp(simdRight + 123.4f, matRight + 123.4f); cmp(simdRight - 123.4f, matRight - 123.4f); cmp(simdRight * 123.4f, matRight * 123.4f); @@ -65,6 +113,55 @@ int main() cmp(simdRight == simdRight, true); cmp(simdLeft != simdRight, true); cmp(simdRight == simdLeft, false); - + cmp(simdRight == simdLeft, false); + cmp(multiply3x3(simdLeft, simdRight.c0), (float3x3)matLeft * (float3)matRight.c0); + cmp(multiply3x3(simdRight, simdRight.c0), (float3x3)matRight * (float3)matRight.c0); + cmp(transpose4x4(simdLeft), transpose(matLeft)); + cmp(transpose3x3(simdRight), transpose((float3x3)matRight)); + cmp(inverse4x4(simdLeft), inverse(matLeft)); +} + +static void testMatrixTransforms() +{ + { + const auto vecTrans = float3(0.0f, 1.23f, -45.067f); + const auto simdTrans = simd_f32_4(vecTrans); + auto mat = translate(vecTrans); + auto simd = translate(simdTrans); + cmp(simd, mat); + cmp(getTranslation(simd), getTranslation(mat)); + cmp(translate(simd, simdTrans), translate(mat, vecTrans)); + cmp(translate(simdTrans, simd), translate(vecTrans, mat)); + cmp(inverseTransRot(simd), inverseTransRot(mat)); + } + { + const auto vecScale = float3(1.0f, 0.5f, 0.01f); + const auto simdScale = simd_f32_4(vecScale); + auto mat = scale(vecScale); + auto simd = scale(simdScale); + cmp(extractScale(simd), extractScale(mat)); + cmp(scale(simd, simdScale), scale(mat, vecScale)); + cmp(scale(simdScale, simd), scale(vecScale, mat)); + } + { + const auto rot = radians(float3(-1.234f, 30.0f, -90.0f)); + const auto vecRot = quat(rot); + const auto simdRot = fromEulerAngles(simd_f32_4(rot)); + auto mat = rotate(vecRot); + auto simd = rotate(simdRot); + cmp(extractRotation(simd), extractRotation(mat)); + cmp(extractRotationOnly(simd), extractRotationOnly(mat)); + cmp(extractQuat(simd), extractQuat(mat)); + cmp(simd * rotate(simdRot), mat * rotate(vecRot)); + cmp(rotate(simdRot) * simd, rotate(vecRot) * mat); + cmp(inverseTransRot(simd), inverseTransRot(mat)); + + // TODO: lookAt + } +} + +int main() +{ + testFloatMatrices(); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/tests/test-quaternion.cpp b/tests/test-quaternion.cpp new file mode 100644 index 0000000..bb32c62 --- /dev/null +++ b/tests/test-quaternion.cpp @@ -0,0 +1,88 @@ +// Copyright 2022-2025 Nikita Fediuchin. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "math/quaternion.hpp" +#include "math/angles.hpp" + +#include +#include + +using namespace math; + +static void cmp(simd_f32_4 a, float4 b, float tolerance = 1.0e-9f) +{ + for (int i = 0; i < 4; i++) + { + if (isnan(a[i])) + a[i] = 0.0f; + if (isnan(b[i])) + b[i] = 0.0f; + } + + auto difference = distanceSq((float4)a, b); + if (difference > tolerance) + throw runtime_error("Float4 vectors test failed."); +} +static void cmp(simd_f32_4 a, float3 b, float tolerance = 1.0e-9f) +{ + for (int i = 0; i < 3; i++) + { + if (isnan(a[i])) + a[i] = 0.0f; + if (isnan(b[i])) + b[i] = 0.0f; + } + + auto difference = distanceSq((float3)a, b); + if (difference > tolerance) + throw runtime_error("Float3 vectors test failed."); +} +static void cmp(float a, float b, float tolerance = 1.0e-9f) +{ + auto difference = (a - b) * (a - b); + if (difference > tolerance) + throw runtime_error("Float test failed."); +} + +static void testQuatVectors() +{ + const auto vecAngles = radians(float4(90.0f, -1.23f, 45.0f, 0.0f)); + const auto simdAngles = simd_f32_4(vecAngles); + const auto vecQuat = quat((float3)vecAngles); + const auto simdQuat = fromEulerAngles(simdAngles); + cmp(simdQuat, vecQuat); + cmp(simd_f32_quat(vecAngles.x, simd_f32_4(1.0f, 0.0f, 0.0f, 0.0f)), quat(vecAngles.x, float3::right)); + cmp(simd_f32_quat(vecAngles.y, simd_f32_4(0.0f, -1.0f, 0.0f, 0.0f)), quat(vecAngles.y, float3::bottom)); + cmp(simdQuat.extractEulerAngles(), vecQuat.extractEulerAngles()); + cmp(simdQuat.extractPitch(), vecQuat.extractPitch()); + cmp(simdQuat.extractYaw(), vecQuat.extractYaw()); + cmp(simdQuat.extractRoll(), vecQuat.extractRoll()); + cmp(conjugate(simdQuat), conjugate(vecQuat)); + cmp(inverse(simdQuat), inverse(vecQuat)); + cmp(slerp(simdQuat, simdQuat * simdQuat, 0.75f), + slerp(vecQuat, vecQuat * vecQuat, 0.75f)); + cmp(simdQuat * simdQuat, vecQuat * vecQuat); + { + const auto vev = float3(1.0f, 2.34f, -0.5678f); + const auto simd = simd_f32_4(vev); + cmp(simdQuat * simd, vecQuat * vev); + cmp(simd * simdQuat, vev * vecQuat); + } +} + +int main() +{ + testQuatVectors(); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/tests/test-vector.cpp b/tests/test-vector.cpp index 092d548..d0fd715 100644 --- a/tests/test-vector.cpp +++ b/tests/test-vector.cpp @@ -18,34 +18,48 @@ using namespace math; -static void cmp(simd_f32_4 a, float4 b) +static void cmp(simd_f32_4 a, float4 b, float tolerance = 1.0e-9f) { for (int i = 0; i < 4; i++) { - if (isnan(a.vec[i])) - a.vec[i] = 0.0f; + if (isnan(a[i])) + a[i] = 0.0f; if (isnan(b[i])) b[i] = 0.0f; } auto difference = distanceSq((float4)a, b); - if (difference > 1.0e-10f) - throw runtime_error("Float vectors test failed."); + if (difference > tolerance) + throw runtime_error("Float4 vectors test failed."); +} +static void cmp(simd_f32_4 a, float3 b, float tolerance = 1.0e-9f) +{ + for (int i = 0; i < 3; i++) + { + if (isnan(a[i])) + a[i] = 0.0f; + if (isnan(b[i])) + b[i] = 0.0f; + } + + auto difference = distanceSq((float3)a, b); + if (difference > tolerance) + throw runtime_error("Float3 vectors test failed."); } static void cmp(simd_i32_4 a, int4 b) { if ((int4)a != b) - throw runtime_error("Float vectors test failed."); + throw runtime_error("Int4 vectors test failed."); } static void cmp(simd_u32_4 a, uint4 b) { if ((uint4)a != b) - throw runtime_error("Uint vectors test failed."); + throw runtime_error("Uint4 vectors test failed."); } -static void cmp(float a, float b) +static void cmp(float a, float b, float tolerance = 1.0e-9f) { auto difference = (a - b) * (a - b); - if (difference > 1.0e-12f) + if (difference > tolerance) throw runtime_error("Float test failed."); } static void cmp(int a, int b) @@ -74,12 +88,20 @@ static void testFloatVectors() cmp(simdLeft > simdRight, vecLeft > vecRight); cmp(simdLeft <= simdRight, vecLeft <= vecRight); cmp(simdLeft >= simdRight, vecLeft >= vecRight); - cmp(compare(simdLeft, simdLeft), compare(vecLeft, vecLeft)); - cmp(compare(simdRight, simdRight), compare(vecRight, vecRight)); - cmp(compare(simdLeft, simdRight), compare(vecLeft, vecRight)); + cmp(equal(simdLeft, simdLeft), equal(vecLeft, vecLeft)); + cmp(equal(simdRight, simdRight), equal(vecRight, vecRight)); + cmp(equal(simdLeft, simdRight), equal(vecLeft, vecRight)); + cmp(notEqual(simdLeft, simdLeft), notEqual(vecLeft, vecLeft)); + cmp(notEqual(simdRight, simdRight), notEqual(vecRight, vecRight)); + cmp(notEqual(simdLeft, simdRight), notEqual(vecLeft, vecRight)); cmp(select(simd_u32_4(1, 0, 0, 1), simdLeft, simdRight), select(uint4(1, 0, 0, 1), vecLeft, vecRight)); + cmp(select(simd_u32_4(0, 1, 0, 1), simdRight, simdLeft), select(uint4(0, 1, 0, 1), vecRight, vecLeft)); cmp(min(simdLeft, simdRight), min(vecLeft, vecRight)); cmp(max(simdLeft, simdRight), max(vecLeft, vecRight)); + cmp(min3(simdLeft), min(vecLeft.x, vecLeft.y, vecLeft.z)); + cmp(min3(simdRight), min(vecRight.x, vecRight.y, vecRight.z)); + cmp(max3(simdLeft), max(vecLeft.x, vecLeft.y, vecLeft.z)); + cmp(max3(simdRight), max(vecRight.x, vecRight.y, vecRight.z)); cmp(fma(simdLeft, simdRight, simdRight), fma(vecLeft, vecRight, vecRight)); cmp(abs(simdLeft), abs(vecLeft)); cmp(abs(simdRight), abs(vecRight)); @@ -101,21 +123,41 @@ static void testFloatVectors() cmp(dot4(simdLeft, simdRight), dot(vecLeft, vecRight)); cmp(dotV3(simdLeft, simdRight), float4(dot((float3)vecLeft, (float3)vecRight))); cmp(dot3(simdLeft, simdRight), dot((float3)vecLeft, (float3)vecRight)); - { - auto c = cross((float3)vecLeft, (float3)vecRight); - cmp(cross3(simdLeft, simdRight), float4(c, c.z)); - } + cmp(cross3(simdLeft, simdRight), cross((float3)vecLeft, (float3)vecRight)); cmp(length4(simdLeft), length(vecLeft)); cmp(length4(simdRight), length(vecRight)); cmp(length3(simdLeft), length((float3)vecLeft)); cmp(length3(simdRight), length((float3)vecRight)); cmp(normalize4(simdLeft), normalize(vecLeft)); cmp(normalize4(simdRight), normalize(vecRight)); + cmp(normalize3(simdLeft), normalize((float3)vecLeft)); + cmp(normalize3(simdRight), normalize((float3)vecRight)); + cmp(isNormalized4(normalize4(simdLeft)), true); + cmp(isNormalized3(normalize3(simdRight)), true); + cmp(isNormalized(normalize(vecLeft)), true); + cmp(isNormalized(normalize((float3)vecRight)), true); + cmp(log(simdLeft), log(vecLeft)); + cmp(log(simdRight), log(vecRight)); + cmp(exp(simdLeft), exp(vecLeft)); + cmp(exp(simdRight), exp(vecRight)); + { + const auto vecPow = float4(2.0f, 1.0f, 0.5f, 0.0f); + const auto simdPow = simd_f32_4(vecPow); + cmp(mod(simdLeft, simdPow), mod(vecLeft, vecPow)); + cmp(mod(simdRight, simdPow), mod(vecRight, vecPow)); + cmp(pow(simdLeft, simdPow), pow(vecLeft, vecPow)); + cmp(pow(simdRight, simdPow), pow(vecRight, vecPow)); + cmp(fastPow(simdLeft, simdPow), pow(vecLeft, vecPow), 1.0e-6f); + cmp(fastPow(simdRight, simdPow), pow(vecRight, vecPow), 1.0e-6f); + } { - auto c = normalize((float3)vecLeft); - cmp(normalize3(simdLeft), float4(c, c.z)); - c = normalize((float3)vecRight); - cmp(normalize3(simdRight), float4(c, c.z)); + simd_f32_4 simdSin, simdCos; + sinCos(simdLeft, simdSin, simdCos); + cmp(simdSin, sin(vecLeft)); + cmp(simdCos, cos(vecLeft)); + sinCos(simdRight, simdSin, simdCos); + cmp(simdSin, sin(vecRight)); + cmp(simdCos, cos(vecRight)); } } static void testIntVectors() @@ -150,10 +192,14 @@ static void testIntVectors() cmp(simdLeft > simdRight, vecLeft > vecRight); cmp(simdLeft <= simdRight, vecLeft <= vecRight); cmp(simdLeft >= simdRight, vecLeft >= vecRight); - cmp(compare(simdLeft, simdLeft), compare(vecLeft, vecLeft)); - cmp(compare(simdRight, simdRight), compare(vecRight, vecRight)); - cmp(compare(simdLeft, simdRight), compare(vecLeft, vecRight)); + cmp(equal(simdLeft, simdLeft), equal(vecLeft, vecLeft)); + cmp(equal(simdRight, simdRight), equal(vecRight, vecRight)); + cmp(equal(simdLeft, simdRight), equal(vecLeft, vecRight)); + cmp(notEqual(simdLeft, simdLeft), notEqual(vecLeft, vecLeft)); + cmp(notEqual(simdRight, simdRight), notEqual(vecRight, vecRight)); + cmp(notEqual(simdLeft, simdRight), notEqual(vecLeft, vecRight)); cmp(select(simd_u32_4(1, 0, 0, 1), simdLeft, simdRight), select(uint4(1, 0, 0, 1), vecLeft, vecRight)); + cmp(select(simd_u32_4(0, 1, 0, 1), simdRight, simdLeft), select(uint4(0, 1, 0, 1), vecRight, vecLeft)); cmp(min(simdLeft, simdRight), min(vecLeft, vecRight)); cmp(max(simdLeft, simdRight), max(vecLeft, vecRight)); } @@ -190,10 +236,14 @@ static void testUintVectors() cmp(areAllTrue(simd_u32_4(UINT32_MAX)), areAllTrue(uint4(UINT32_MAX))); cmp(areAllFalse(simd_u32_4(0u)), areAllTrue(uint4(0u))); cmp(areAnyTrue(simd_u32_4(0, 1, 0, 0)), areAllTrue(uint4(0, 1, 0, 0))); - cmp(compare(simdLeft, simdLeft), compare(vecLeft, vecLeft)); - cmp(compare(simdRight, simdRight), compare(vecRight, vecRight)); - cmp(compare(simdLeft, simdRight), compare(vecLeft, vecRight)); + cmp(equal(simdLeft, simdLeft), equal(vecLeft, vecLeft)); + cmp(equal(simdRight, simdRight), equal(vecRight, vecRight)); + cmp(equal(simdLeft, simdRight), equal(vecLeft, vecRight)); + cmp(notEqual(simdLeft, simdLeft), notEqual(vecLeft, vecLeft)); + cmp(notEqual(simdRight, simdRight), notEqual(vecRight, vecRight)); + cmp(notEqual(simdLeft, simdRight), notEqual(vecLeft, vecRight)); cmp(select(simd_u32_4(1, 0, 1, 0), simdLeft, simdRight), select(uint4(1, 0, 1, 0), vecLeft, vecRight)); + cmp(select(simd_u32_4(0, 1, 1, 0), simdRight, simdLeft), select(uint4(0, 1, 1, 0), vecRight, vecLeft)); cmp(min(simdLeft, simdRight), min(vecLeft, vecRight)); cmp(max(simdLeft, simdRight), max(vecLeft, vecRight)); }