diff --git a/.gitignore b/.gitignore
index fc06e7c..edc742f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,12 @@
 # Created by https://www.toptal.com/developers/gitignore/api/c,c++,cmake,macos,linux,windows
 # Edit at https://www.toptal.com/developers/gitignore?templates=c,c++,cmake,macos,linux,windows
 
+### Common ###
+.vs/
+.vscode/
+out/
+build/
+
 ### C ###
 # Prerequisites
 *.d
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9cc8b24..9ad0c58 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,4 +60,12 @@ if(MATH_BUILD_TESTS)
 	add_executable(TestMathMatrix tests/test-matrix.cpp)
 	target_link_libraries(TestMathMatrix PUBLIC math-static)
 	add_test(NAME TestMathMatrix COMMAND TestMathMatrix)
+
+	add_executable(TestMathQuaternion tests/test-quaternion.cpp)
+	target_link_libraries(TestMathQuaternion PUBLIC math-static)
+	add_test(NAME TestMathQuaternion COMMAND TestMathQuaternion)
+
+	add_executable(TestMathContainers tests/test-containers.cpp)
+	target_link_libraries(TestMathContainers PUBLIC math-static)
+	add_test(NAME TestMathContainers COMMAND TestMathContainers)
 endif()
\ No newline at end of file
diff --git a/include/math/aabb.hpp b/include/math/aabb.hpp
index 543acd8..af93766 100644
--- a/include/math/aabb.hpp
+++ b/include/math/aabb.hpp
@@ -22,6 +22,8 @@
 #include "math/ray.hpp"
 #include "math/plane.hpp"
 
+// TODO: Port more Jolt optimized BBox functions.
+
 namespace math
 {
 
@@ -38,179 +40,194 @@ namespace math
 struct [[nodiscard]] Aabb
 {
 protected:
-	float3 min, max;
+	simd_f32_4 min, max;
 public:
 	/**
-	 * @brief Creates a new Axis Aligned Bounding Box. (AABB)
+	 * @brief Creates a new Axis Aligned Bounding Box in 3D space. (AABB)
 	 * 
-	 * @param[in] min minimum bounding box corner position in the space
-	 * @param[in] max maximum bounding box corner position in the space
+	 * @param min minimum bounding box corner position in 3D space
+	 * @param max maximum bounding box corner position in 3d space
 	 */
-	constexpr Aabb(const float3& min = float3::zero, const float3& max = float3::zero) noexcept : min(min), max(max)
+	Aabb(simd_f32_4 min = simd_f32_4::zero, simd_f32_4 max = simd_f32_4::zero) noexcept : min(min), max(max)
 	{
 		assert(areAllTrue(min <= max));
 	}
 
 	/**
-	 * @brief Returns minimum AABB corner position in the space.
+	 * @brief Returns minimum AABB corner position in 3D space.
 	 * @details The point that has the smallest values for all axes.
 	 */
-	constexpr const float3& getMin() const noexcept { return min; }
+	const simd_f32_4& getMin() const noexcept { return min; }
 	/**
-	 * @brief Returns maximum AABB corner position in the space.
+	 * @brief Returns maximum AABB corner position in 3D space.
 	 * @details The point that has the biggest values for all axes.
 	 */
-	constexpr const float3& getMax() const noexcept { return max; }
+	const simd_f32_4& getMax() const noexcept { return max; }
 
 	/**
-	 * @brief Sets minimum AABB corner position in the space.
+	 * @brief Sets minimum AABB corner position in 3D space.
 	 * @details See the @ref getMin().
-	 * @param[in] min minimum bounding box corner position in the space
+	 * @param min minimum bounding box corner position in 3D space
 	 */
-	void setMin(const float3& min) noexcept
+	void setMin(simd_f32_4 min) noexcept
 	{
 		assert(areAllTrue(min <= this->max));
 		this->min = min;
 	}
 	/**
-	 * @brief Sets maximum AABB corner position in the space.
+	 * @brief Sets maximum AABB corner position in 3D space.
 	 * @details See the @ref setMax().
-	 * @param[in] max maximum bounding box corner position in the space
+	 * @param max maximum bounding box corner position in 3D space
 	 */
-	void setMax(const float3& max) noexcept
+	void setMax(simd_f32_4 max) noexcept
 	{
 		assert(areAllTrue(max >= this->min));
 		this->max = max;
 	}
 	/**
-	 * @brief Sets minimum and maximum AABB corner position in the space.
+	 * @brief Sets minimum and maximum AABB corner position in 3D space.
 	 * @details See the @ref getMin() and @ref getMax().
 	 * 
-	 * @param[in] min minimum bounding box corner position in the space
-	 * @param[in] max maximum bounding box corner position in the space
+	 * @param min minimum bounding box corner position in 3D space
+	 * @param max maximum bounding box corner position in 3D space
 	 */
-	void set(const float3& min, const float3& max) noexcept
+	void set(simd_f32_4 min, simd_f32_4 max) noexcept
 	{
 		assert(areAllTrue(min <= max));
-		this->min = min;
-		this->max = max;
+		this->min = min; this->max = max;
 	}
 	
 	/**
-	 * @brief Sets size and position of the bounding box.
+	 * @brief Sets size of the bounding box.
+	 * @param size target size of the bounding box
+	 */
+	void setSize(simd_f32_4 size) noexcept
+	{
+		assert(areAllTrue(size >= 0.0f));
+		auto extent = size * 0.5f;
+		min = -extent; max = extent;
+	}
+	/**
+	 * @brief Sets size and position of the bounding box in 3D space.
 	 * 
-	 * @param[in] size target size of the bounding box
-	 * @param[in] position target position of the bounding box in space
+	 * @param size target size of the bounding box
+	 * @param position target position of the bounding box in 3D space
 	 */
-	void setSize(const float3& size, const float3& position = float3::zero) noexcept
+	void setSize(simd_f32_4 size, simd_f32_4 position) noexcept
 	{
 		assert(areAllTrue(size >= 0.0f));
 		auto extent = size * 0.5f;
-		min = position - extent;
-		max = position + extent;
+		min = position - extent; max = position + extent;
 	}
 
 	/**
 	 * @brief Returns size of the bounding box.
 	 */
-	constexpr float3 getSize() const noexcept { return max - min; }
+	simd_f32_4 getSize() const noexcept { return max - min; }
 	/**
-	 * @brief Returns position of the bounding box.
+	 * @brief Returns position of the bounding box in 3D space.
 	 */
-	constexpr float3 getPosition() const noexcept { return (min + max) * 0.5f; }
+	simd_f32_4 getPosition() const noexcept { return (min + max) * 0.5f; }
 
 	/*******************************************************************************************************************
-	 * @brief Sets extent and position of the bounding box.
-	 * 
-	 * @param[in] extent target extent of the bounding box (half size)
-	 * @param[in] position target position of the bounding box in space
+	 * @brief Sets extent and position of the bounding box in 3D space.
+	 * @param extent target extent of the bounding box (half size)
 	 */
-	void setExtent(const float3& extent, const float3& position = float3::zero) noexcept
+	void setExtent(simd_f32_4 extent) noexcept
 	{
 		assert(areAllTrue(extent >= 0.0f));
-		min = position - extent;
-		max = position + extent;
+		min = -extent; max = extent;
 	}
-
 	/**
-	 * @brief Returns extent and position of the bounding box.
+	 * @brief Sets extent and position of the bounding box in 3D space.
 	 * 
-	 * @param[out] extent target extent of the bounding box (half size)
-	 * @param[out] position target position of the bounding box in space
+	 * @param extent target extent of the bounding box (half size)
+	 * @param position target position of the bounding box in 3D space
 	 */
-	void getExtent(float3& extent, float3& position) const noexcept
+	void setExtent(simd_f32_4 extent, simd_f32_4 position) noexcept
 	{
-		extent = (max - min) * 0.5f;
-		position = min + extent;
+		assert(areAllTrue(extent >= 0.0f));
+		min = position - extent; max = position + extent;
 	}
+
 	/**
 	 * @brief Returns extent of the bounding box. (half size)
 	 */
-	constexpr float3 getExtent() const noexcept { return (max - min) * 0.5f; }
+	simd_f32_4 getExtent() const noexcept { return (max - min) * 0.5f; }
+	/**
+	 * @brief Returns extent and position of the bounding box in 3D space.
+	 * 
+	 * @param extent target extent of the bounding box (half size)
+	 * @param position target position of the bounding box in 3D space
+	 */
+	void getExtent(simd_f32_4& extent, simd_f32_4& position) const noexcept
+	{
+		extent = (max - min) * 0.5f; position = min + extent;
+	}
 
 	/**
-	 * @brief Extends bounding box min and max corner positions.
-	 * @param[in] point target point to include into the bounding box
+	 * @brief Extends bounding box min and max corner positions in 3D space.
+	 * @param point target point to include into the bounding box in 3D space
 	 */
-	void extend(const float3& point) noexcept
+	void extend(simd_f32_4 point) noexcept
 	{
-		min = math::min(min, point);
-		max = math::max(max, point); 
+		min = math::min(min, point); max = math::max(max, point); 
 	}
 	/**
-	 * @brief Extends bounding box min and max corner positions.
+	 * @brief Extends bounding box min and max corner positions in 3D space.
 	 * @param[in] aabb target AABB to include into the bounding box
 	 */
 	void extend(const Aabb& aabb) noexcept
 	{
-		min = math::min(min, aabb.min);
-		max = math::max(max, aabb.max); 
+		min = math::min(min, aabb.min); max = math::max(max, aabb.max); 
 	}
 
 	/**
-	 * @brief Calculates area of the AABB.
+	 * @brief Translates bounding box in 3D space.
+	 * @param translation target translation transformation in 3D space
 	 */
-	constexpr float calcArea() const noexcept
-	{
-		auto extent = max - min;
-		return extent.x * extent.y + extent.y * extent.z + extent.z * extent.x;
-	}
+	void translate(simd_f32_4 translation) noexcept { min += translation; max += translation; }
+	/**
+	 * @brief Scales bounding box in 3D space.
+	 * @param scale target scale transformation in 3D space
+	 */
+	void scale(simd_f32_4 scale) noexcept { min *= scale; max *= scale; }
 
-	constexpr Aabb operator*(const float3& v) const noexcept { return Aabb(min * v, max * v); }
-	constexpr Aabb operator/(const float3& v) const noexcept { return Aabb(min / v, max / v); }
-	constexpr Aabb operator+(const float3& v) const noexcept { return Aabb(min + v, max + v); }
-	constexpr Aabb operator-(const float3& v) const noexcept { return Aabb(min - v, max - v); }
-	Aabb& operator*=(const float3& v) noexcept { min *= v; max *= v; return *this; }
-	Aabb& operator/=(const float3& v) noexcept { min /= v; max /= v; return *this; }
-	Aabb& operator+=(const float3& v) noexcept { min += v; max += v; return *this; }
-	Aabb& operator-=(const float3& v) noexcept { min -= v; max -= v; return *this; }
-	constexpr bool operator==(const Aabb& v) const noexcept { return min == v.min && max == v.max; }
-	constexpr bool operator!=(const Aabb& v) const noexcept { return min != v.min || max != v.max; }
-
-	constexpr bool operator<(const Aabb& v) const noexcept
-	{
-		return areAllTrue(min < v.min) && areAllTrue(max < v.max);
-	}
-	constexpr bool operator>(const Aabb& v) const noexcept
-	{ 
-		return areAllTrue(min > v.min) && areAllTrue(max > v.max);
-	}
-	constexpr bool operator<=(const Aabb& v) const noexcept
+	// TODO: update min max using rotation matrix
+
+	/**
+	 * @brief Calculates area of the bounding box.
+	 */
+	float calcArea() const noexcept
 	{
-		return areAllTrue(min <= v.min) && areAllTrue(max <= v.max);
+		auto extent = max - min;
+		return 2.0f * (extent.getX() * extent.getY() + extent.getX() * extent.getZ() + extent.getY() * extent.getZ());
 	}
-	constexpr bool operator>=(const Aabb& v) const noexcept
+	/**
+	 * @brief Calculates volume of the bounding box.
+	 */
+	float calcVolume() const noexcept
 	{
-		return areAllTrue(min >= v.min) && areAllTrue(max >= v.max);
+		auto extent = max - min;
+		return extent.getX() * extent.getY() * extent.getZ();
 	}
+
+	Aabb operator*(simd_f32_4 v) const noexcept { return Aabb(min * v, max * v); }
+	Aabb operator/(simd_f32_4 v) const noexcept { return Aabb(min / v, max / v); }
+	Aabb operator+(simd_f32_4 v) const noexcept { return Aabb(min + v, max + v); }
+	Aabb operator-(simd_f32_4 v) const noexcept { return Aabb(min - v, max - v); }
+	Aabb& operator*=(simd_f32_4 v) noexcept { min *= v; max *= v; return *this; }
+	Aabb& operator/=(simd_f32_4 v) noexcept { min /= v; max /= v; return *this; }
+	Aabb& operator+=(simd_f32_4 v) noexcept { min += v; max += v; return *this; }
+	Aabb& operator-=(simd_f32_4 v) noexcept { min -= v; max -= v; return *this; }
 	
 	static const Aabb zero, one, two, half;
 };
 
-inline const Aabb Aabb::zero = Aabb(float3::zero, float3::zero);
-inline const Aabb Aabb::one = Aabb(float3(-0.5f), float3(0.5f));
-inline const Aabb Aabb::two = Aabb(float3::minusOne, float3::one);
-inline const Aabb Aabb::half = Aabb(float3(-0.25f), float3(0.25f));
+inline const Aabb Aabb::zero = Aabb(simd_f32_4::zero, simd_f32_4::zero);
+inline const Aabb Aabb::one = Aabb(simd_f32_4(-0.5f), simd_f32_4(0.5f));
+inline const Aabb Aabb::two = Aabb(simd_f32_4::minusOne, simd_f32_4::one);
+inline const Aabb Aabb::half = Aabb(simd_f32_4(-0.25f), simd_f32_4(0.25f));
 
 /**
  * @brief Returns true if first AABB binary representation is less than the second.
@@ -221,59 +238,113 @@ inline const Aabb Aabb::half = Aabb(float3(-0.25f), float3(0.25f));
 static bool isBinaryLess(const Aabb& a, const Aabb& b) noexcept { return memcmp(&a, &b, sizeof(float3) * 2) < 0; }
 
 /***********************************************************************************************************************
- * @brief Returns true if point is inside the AABB.
+ * @brief Returns true if point is inside the AABB in 3D space.
  * 
  * @param[in] aabb target AABB to check
- * @param[in] point target point in the space
+ * @param point target point in the space
+ */
+static bool isInside(const Aabb& aabb, simd_f32_4 point) noexcept
+{
+	return areAllTrue(aabb.getMin() <= point & aabb.getMax() >= point);
+}
+/**
+ * @brief Returns true if other AABB is inside this AABB in 3D space.
+ * 
+ * @param[in] aabb this AABB
+ * @param[in] other another AABB to check
  */
-static constexpr bool isInside(const Aabb& aabb, const float3& point) noexcept
+static bool isInside(const Aabb& aabb, const Aabb& other) noexcept
 {
-	return areAllTrue(aabb.getMin() <= point) && areAllTrue(point <= aabb.getMax());
+	return areAllTrue(aabb.getMin() <= other.getMin() & aabb.getMax() >= other.getMax());
 }
+
 /**
- * @brief Returns closest point inside AABB to the specified one.
+ * @brief Returns closest point inside AABB to the specified one in 3D space.
  *
  * @param[in] aabb target AABB to use
- * @param[in] point target point in the space
+ * @param point target point in the space
  */
-static constexpr float3 closestPoint(const Aabb& aabb, const float3& point) noexcept
+static simd_f32_4 closestPoint(const Aabb& aabb, simd_f32_4 point) noexcept
 {
 	return clamp(point, aabb.getMin(), aabb.getMax());
 }
 
 /**
- * @brief Calculates where ray intersects the AABB.
- * @return Distance to the intersection points.
+ * @brief Calculates where ray intersects the AABB in 3D space. (Ray is inversed!)
+ * @return Distance to the intersection points, or FLT_MAX; -FLT_MAX if no hit.
  *
- * @param[in] aabb target AABB to raycast
- * @param[in] ray target ray in the space
+ * @param aabb target AABB to raycast
+ * @param ray target ray in the space (inversed!)
  */
-float2 raycast2(const Aabb& aabb, const Ray& ray) noexcept;
+static float2 raycast2I(Aabb aabb, Ray ray) noexcept
+{
+	auto t1 = (aabb.getMin() - ray.origin) * ray.getDirection();
+	auto t2 = (aabb.getMax() - ray.origin) * ray.getDirection();
+	auto tMin = select(ray.getParallel(), simd_f32_4::minusMax, min(t1, t2));
+	auto tMax = select(ray.getParallel(), simd_f32_4::max, max(t1, t2));
+	tMin = max(tMin, tMin.swizzle<SwY, SwZ, SwX, SwU>());
+	tMin = max(tMin, tMin.swizzle<SwZ, SwX, SwY, SwU>());
+	tMax = min(tMax, tMax.swizzle<SwY, SwZ, SwX, SwU>());
+	tMax = min(tMax, tMax.swizzle<SwZ, SwX, SwY, SwU>());
+
+	auto noParallelOverlap = (ray.origin < aabb.getMin()) | (ray.origin > aabb.getMax());
+	auto noIntersection = (tMin > tMax) | (tMax < simd_f32_4::zero) | (ray.getParallel() & noParallelOverlap);
+	noIntersection |= noIntersection.splatY();
+	noIntersection |= noIntersection.splatZ();
+
+	return float2(
+		select(noIntersection, simd_f32_4::max, tMin).getX(), 
+		select(noIntersection, simd_f32_4::minusMax, tMax).getX());
+}
 /**
- * @brief Calculates where ray intersects the AABB. (Ray is inversed!)
- * @return Distance to the intersection points.
+ * @brief Calculates where ray intersects the AABB in 3D space.
+ * @return Distance to the intersection points, or FLT_MAX; -FLT_MAX if no hit.
  *
  * @param[in] aabb target AABB to raycast
- * @param[in] ray target ray in the space (inversed)
+ * @param ray target ray in the space
  */
-float2 raycast2I(const Aabb& aabb, const Ray& ray) noexcept;
+static float2 raycast2(const Aabb& aabb, Ray ray) noexcept
+{
+	ray.setDirection(simd_f32_4::one / ray.getDirection());
+	return raycast2I(aabb, ray);
+}
 
 /**
- * @brief Calculates where ray intersects the AABB.
- * @return Distance to the first intersection point.
+ * @brief Calculates where ray intersects the AABB in 3D space. (Ray is inversed!)
+ * @return Distance to the first intersection point, or FLT_MAX; -FLT_MAX if no hit.
  *
  * @param[in] aabb target AABB to raycast
- * @param[in] ray target ray in the space
+ * @param[in] ray target ray in the space (inversed)
  */
-float raycast1(const Aabb& aabb, const Ray& ray) noexcept;
+static float raycast1I(const Aabb& aabb, const Ray& ray) noexcept
+{
+	auto t1 = (aabb.getMin() - ray.origin) * ray.getDirection();
+	auto t2 = (aabb.getMax() - ray.origin) * ray.getDirection();
+	auto tMin = select(ray.getParallel(), simd_f32_4::minusMax, min(t1, t2));
+	auto tMax = select(ray.getParallel(), simd_f32_4::max, max(t1, t2));
+	tMin = max(tMin, tMin.swizzle<SwY, SwZ, SwX, SwU>());
+	tMin = max(tMin, tMin.swizzle<SwZ, SwX, SwY, SwU>());
+	tMax = min(tMax, tMax.swizzle<SwY, SwZ, SwX, SwU>());
+	tMax = min(tMax, tMax.swizzle<SwZ, SwX, SwY, SwU>());
+
+	auto noParallelOverlap = (ray.origin < aabb.getMin()) | (ray.origin > aabb.getMax());
+	auto noIntersection = (tMin > tMax) | (tMax < simd_f32_4::zero) | (ray.getParallel() & noParallelOverlap);
+	noIntersection |= noIntersection.splatY();
+	noIntersection |= noIntersection.splatZ();
+	return select(noIntersection, simd_f32_4::max, tMin).getX();
+}
 /**
- * @brief Calculates where ray intersects the AABB. (Ray is inversed!)
- * @return Distance to the first intersection point.
+ * @brief Calculates where ray intersects the AABB in 3D space.
+ * @return Distance to the first intersection point, or FLT_MAX; -FLT_MAX if no hit.
  *
  * @param[in] aabb target AABB to raycast
- * @param[in] ray target ray in the space (inversed)
+ * @param ray target ray in the space
  */
-float raycast1I(const Aabb& aabb, const Ray& ray) noexcept;
+static float raycast1(const Aabb& aabb, Ray ray) noexcept
+{
+	ray.setDirection(simd_f32_4::one / ray.getDirection());
+	return raycast1I(aabb, ray);
+}
 
 /**
  * @brief Returns true if ray is intersecting the AABB.
@@ -286,7 +357,7 @@ static constexpr bool isAabbIntersected(float2 raycastDists) noexcept
 }
 
 /**
- * @brief Returns true if ray intersects the AABB.
+ * @brief Returns true if ray intersects the AABB in 3D space.
  * 
  * @param[in] aabb target AABB to raycast
  * @param[in] ray target ray in the space
@@ -314,18 +385,18 @@ static bool raycastI(const Aabb& aabb, const Ray& ray) noexcept
  */
 static bool isIntersected(const Aabb& a, const Aabb& b) noexcept
 {
-	return areAllTrue(a.getMin() <= b.getMax()) && areAllTrue(b.getMin() <= a.getMax());
+	return areAllTrue(a.getMin() <= b.getMax() & a.getMin() >= b.getMax());
 }
 
 /**
  * @brief Returns true if triangle intersects the AABB.
  * @details Usefull for a fast 3D model voxelization.
  *
- * @param[in] center target AABB center point in the space
- * @param[in] extent target AABB extent (half size)
+ * @param position target AABB position in 3D space
+ * @param extent target AABB extent (half size)
  * @param[in] triangle target triangle in the space
  */
-bool isAabbIntersected(const float3& center, float3 extent, const Triangle& triangle) noexcept;
+bool isAabbIntersected(simd_f32_4 position, simd_f32_4 extent, const Triangle& triangle) noexcept;
 
 /**
  * @brief Returns true if AABB is behind the frustum planes.
@@ -339,9 +410,35 @@ bool isAabbIntersected(const float3& center, float3 extent, const Triangle& tria
  * @param[in] aabb target AABB to check
  * @param[in] model AABB transformation matrix
  * @param[in] planes target frustum planes
- * @param[in] planeCount frustum plane count
+ * @param planeCount frustum plane count
  */
-bool isBehindFrustum(const Aabb& aabb, const float4x4& model,
-	const Plane* planes, uint8 planeCount = Plane::frustumCount) noexcept;
+static bool isBehindFrustum(const Aabb& aabb, const simd_f32_4x4& model,
+	const Plane* planes, uint8 planeCount = Plane::frustumCount) noexcept
+{
+	auto min = aabb.getMin(), max = aabb.getMax();
+	auto minX = min.getX(), minY = min.getY(), minZ = min.getZ();
+	auto maxX = max.getX(), maxY = max.getY(), maxZ = max.getZ();
+
+	auto v0 = model * simd_f32_4(min, 1.0f);
+	auto v1 = model * simd_f32_4(minX, minY, maxZ, 1.0f);
+	auto v2 = model * simd_f32_4(minX, maxY, minZ, 1.0f);
+	auto v3 = model * simd_f32_4(minX, maxY, maxZ, 1.0f);
+	auto v4 = model * simd_f32_4(maxX, minY, minZ, 1.0f);
+	auto v5 = model * simd_f32_4(maxX, minY, maxZ, 1.0f);
+	auto v6 = model * simd_f32_4(maxX, maxY, minZ, 1.0f);
+	auto v7 = model * simd_f32_4(max, 1.0f);
+
+	for (uint8 i = 0; i < planeCount; i++)
+	{
+		auto plane = planes[i];
+		auto d0 = simd_f32_4(distance3(plane, v0), distance3(plane, v1), distance3(plane, v2), distance3(plane, v3));
+		auto d1 = simd_f32_4(distance3(plane, v4), distance3(plane, v5), distance3(plane, v6), distance3(plane, v7)); // TODO: use simd_f32_8?
+		
+		if (areAllTrue(d0 < 0.0f & d1 < 0.0f))
+			return true;
+	}
+
+	return false;
+}
 
 } // namespace math
\ No newline at end of file
diff --git a/include/math/angles.hpp b/include/math/angles.hpp
index b3d4033..4700eb4 100644
--- a/include/math/angles.hpp
+++ b/include/math/angles.hpp
@@ -37,26 +37,22 @@ static constexpr float radians(float degrees) noexcept { return degrees * ((floa
  * @brief Converts specified vector in degrees to radians.
  * @param degrees target vector in degrees
  */
-static constexpr float2 radians(float2 degrees) noexcept
-{
-	return float2(radians(degrees.x), radians(degrees.y));
-}
+static constexpr float2 radians(float2 degrees) noexcept { return degrees * ((float)M_PI / 180.0f); }
 /**
  * @brief Converts specified vector in degrees to radians.
  * @param degrees target vector in degrees
  */
-static constexpr float3 radians(float3 degrees) noexcept
-{
-	return float3(radians(degrees.x), radians(degrees.y), radians(degrees.z));
-}
+static constexpr float3 radians(float3 degrees) noexcept { return degrees * ((float)M_PI / 180.0f); }
 /**
  * @brief Converts specified vector in degrees to radians.
  * @param degrees target vector in degrees
  */
-static constexpr float4 radians(float4 degrees) noexcept
-{
-	return float4(radians(degrees.x), radians(degrees.y), radians(degrees.z), radians(degrees.w));
-}
+static constexpr float4 radians(float4 degrees) noexcept { return degrees * ((float)M_PI / 180.0f); }
+/**
+ * @brief Converts specified SIMD vector in degrees to radians.
+ * @param degrees target SIMD vector in degrees
+ */
+static simd_f32_4 radians(simd_f32_4 degrees) noexcept { return degrees * (float)M_PI / 180.0f; }
 
 /**
  * @brief Converts specified value in radians to degrees.
@@ -67,25 +63,21 @@ static constexpr float degrees(float radians) noexcept { return radians * (180.0
  * @brief Converts specified vector in radians to degrees.
  * @param degrees target vector in radians
  */
-static constexpr float2 degrees(float2 radians) noexcept
-{
-	return float2(degrees(radians.x), degrees(radians.y));
-}
+static constexpr float2 degrees(float2 radians) noexcept { return radians * (180.0f / (float)M_PI); }
 /**
  * @brief Converts specified vector in radians to degrees.
  * @param degrees target vector in radians
  */
-static constexpr float3 degrees(float3 radians) noexcept
-{
-	return float3(degrees(radians.x), degrees(radians.y), degrees(radians.z));
-}
+static constexpr float3 degrees(float3 radians) noexcept { return radians * (180.0f / (float)M_PI); }
 /**
  * @brief Converts specified vector in radians to degrees.
  * @param degrees target vector in radians
  */
-static constexpr float4 degrees(float4 radians) noexcept
-{
-	return float4(degrees(radians.x), degrees(radians.y), degrees(radians.z), degrees(radians.w));
-}
+static constexpr float4 degrees(float4 radians) noexcept { return radians * (180.0f / (float)M_PI); }
+/**
+ * @brief Converts specified SIMD vector in radians to degrees.
+ * @param degrees target SIMD vector in radians
+ */
+static simd_f32_4 degrees(simd_f32_4 radians) noexcept { return radians * 180.0f / (float)M_PI; }
 
 }; // namespace math
\ No newline at end of file
diff --git a/include/math/bvh.hpp b/include/math/bvh.hpp
index 96b1c58..9703441 100644
--- a/include/math/bvh.hpp
+++ b/include/math/bvh.hpp
@@ -38,38 +38,6 @@ namespace math
  */
 struct Bvh
 {
-	/**
-	 * @brief BVH hierarchy base node container.
-	 * @details See the @ref Bvh::Node.
-	 */
-	struct BaseNode
-	{
-		float3 min = float3::zero;
-		uint32 primitiveCount = 0;
-		float3 max = float3::zero;
-	};
-	/**
-	 * @brief BVH hierarchy child node container.
-	 * @details See the @ref Bvh::Node.
-	 */
-	struct ChildNode
-	{
-		float3 min = float3::zero;
-		uint32 primitiveCount = 0;
-		float3 max = float3::zero;
-		uint32 leftNode = 0;
-	};
-	/**
-	 * @brief BVH hierarchy leaf node container.
-	 * @details See the @ref Bvh::Node.
-	 */
-	struct LeafNode
-	{
-		float3 min = float3::zero;
-		uint32 primitiveCount = 0;
-		float3 max = float3::zero;
-		uint32 firstPrimitive = 0;
-	};
 	/**
 	 * @brief BVH hierarchy node container.
 	 * 
@@ -80,29 +48,57 @@ struct Bvh
 	 */
 	union Node
 	{
-		BaseNode base;
-		ChildNode child;
-		LeafNode leaf;
-		constexpr Node() : leaf() { }
+		Aabb aabb = {};
 		
 		/**
-		 * @brief Returns true if this is leaf node. (Contain the actual geometric objects)
+		 * @brief Returns this leaf BVH node primitive count.
 		 */
-		bool isLeaf() const noexcept { return base.primitiveCount; }
+		uint32 getPrimitiveCount() const noexcept { return aabb.getMin().uints.w; }
 		/**
-		 * @brief Returns node Axis Aligned Bounding Box. (AABB)
+		 * @brief Sets this leaf BVH node primitive count.
+		 * @param primitiveCount target leaf node primitive count
 		 */
-		Aabb getAabb() const noexcept { return Aabb(base.min, base.max); }
+		void setPrimitiveCount(uint32 primitiveCount) noexcept
+		{
+			auto min = aabb.getMin();
+			min.uints.w = primitiveCount;
+			aabb.setMin(min);
+		}
+
+		/**
+		 * @brief Returns this BVH node left child node identifier.
+		 */
+		uint32 getLeftNode() const noexcept { return aabb.getMax().getW(); }
+		/**
+		 * @brief Sets this BVH node left child node identifier.
+		 * @param nodeID target node left child node identifier
+		 */
+		void setLeftNode(uint32 nodeID) noexcept
+		{
+			auto max = aabb.getMax();
+			max.uints.w = nodeID;
+			aabb.setMax(max);
+		}
+
 		/**
-		 * @brief Sets node Axis Aligned Bounding Box. (AABB)
-		 * @param[in] aabb target aabb value
+		 * @brief Returns this leaf BVH node first primitive index.
+		 */
+		uint32 getFirstPrimitive() const noexcept { return getLeftNode(); }
+		/**
+		 * @brief Sets this leaf BVH node first primitive index.
+		 * @param primitiveIndex target node first primitive index
+		 */
+		void setFirstPrimitive(uint32 primitiveIndex) noexcept { setLeftNode(primitiveIndex); }
+
+		/**
+		 * @brief Returns true if this is leaf node. (Contain the actual geometric objects)
 		 */
-		void setAabb(const Aabb& aabb) noexcept { base.min = aabb.getMin(), base.max = aabb.getMax(); }
+		bool isLeaf() const noexcept { return aabb.getMin().getW(); }
 	};
 protected:
 	vector<Node> nodes;
 	vector<uint32> primitives;
-	vector<float3> centroids;
+	vector<simd_f32_4> centroids;
 	stack<Node*> nodeStack;
 public:
 	/*******************************************************************************************************************
@@ -117,7 +113,7 @@ struct Bvh
 	 * @param centroids precalculated centroid array
 	 */
 	Bvh(const uint8* vertices, const uint8* indices, const Aabb& aabb, uint32 indexCount,
-		uint32 vertexSize, uint32 indexSize, const float3* centroids = nullptr);
+		uint32 vertexSize, uint32 indexSize, const simd_f32_4* centroids = nullptr);
 	/**
 	 * @brief Creates a new BVH from the AABB array.
 	 *
@@ -126,7 +122,7 @@ struct Bvh
 	 * @param aabbCount AABB array size
 	 * @param centroids precalculated centroid array
 	 */
-	Bvh(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const float3* centroids = nullptr);
+	Bvh(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const simd_f32_4* centroids = nullptr);
 	/**
 	 * @brief Creates a new empty BVH.
 	 */
@@ -143,7 +139,7 @@ struct Bvh
 	/**
 	 * @brief Returns BVH centroid array.
 	 */
-	const vector<float3>& getCentroids() const noexcept { return centroids; }
+	const vector<simd_f32_4>& getCentroids() const noexcept { return centroids; }
 
 	/**
 	 * @brief Recreates BVH from the triangle array.
@@ -157,7 +153,7 @@ struct Bvh
 	 * @param centroids precalculated centroid array or null
 	 */
 	void recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb, uint32 indexCount,
-		uint32 vertexSize, uint32 indexSize, const float3* centroids = nullptr);
+		uint32 vertexSize, uint32 indexSize, const simd_f32_4* centroids = nullptr);
 	/**
 	 * @brief Recreates BVH from the AABB array.
 	 *
@@ -166,7 +162,7 @@ struct Bvh
 	 * @param aabbCount AABB array size
 	 * @param centroids precalculated centroid array or null
 	 */
-	void recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const float3* centroids = nullptr);
+	void recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const simd_f32_4* centroids = nullptr);
 
 	// TODO: add traverse function
 };
diff --git a/include/math/color-space.hpp b/include/math/color-space.hpp
index 5447736..17e3d04 100644
--- a/include/math/color-space.hpp
+++ b/include/math/color-space.hpp
@@ -24,85 +24,90 @@ namespace math
 {
 
 /**
- * @brief Converts linear RGB color to the sRGB color space.
- * @param rgb target linear RGB color
+ * @brief Converts linear RGBA color to the sRGB color space.
+ * @param rgba target linear RGBA color
  */
-static float3 rgbToSrgb(float3 rgb) noexcept
+static simd_f32_4 rgbToSrgb(simd_f32_4 rgba) noexcept
 {
-	constexpr auto c = 0.0031308f;
-	auto l = fma(pow(rgb, float3(1.0f / 2.4f)), float3(1.055f), float3(-0.055f));
-	auto h = rgb * 12.92f;
-	return float3(rgb.x <= c ? h.x : l.x, rgb.y <= c ? h.y : l.y, rgb.z <= c ? h.z : l.z);
+	static const auto c = simd_f32_4(0.0031308f);
+	auto h = rgba * 12.92f;
+	auto l = fma(pow(rgba, simd_f32_4(1.0f / 2.4f)), simd_f32_4(1.055f), simd_f32_4(-0.055f));
+	auto r = select(rgba <= c, h, l); r.setW(rgba.getW());
+	return r;
 }
 /**
- * @brief Converts sRGB color to the linear RGB color space.
+ * @brief Converts sRGB color to the linear RGBA color space.
  * @param srgb target sRGB color
  */
-static float3 srgbToRgb(float3 srgb) noexcept
+static simd_f32_4 srgbToRgb(simd_f32_4 sRGB) noexcept
 {
-	constexpr auto c = 0.04045f;
-	auto l = pow((srgb + 0.055f) * (1.0f / 1.055f), float3(2.4f));
-	auto h = srgb * (1.0f / 12.92f);
-	return float3(srgb.x <= c ? h.x : l.x, srgb.y <= c ? h.y : l.y, srgb.z <= c ? h.z : l.z);
+	static const auto c = simd_f32_4(0.04045f);
+	auto h = sRGB * (1.0f / 12.92f);
+	auto l = pow((sRGB + 0.055f) * (1.0f / 1.055f), simd_f32_4(2.4f));
+	auto r = select(sRGB <= c, h, l); r.setW(sRGB.getW());
+	return r;
 }
 
 /**
  * @brief Converts linear RGB color to the XYZ color space. (CIE 1931)
- * @param[in] rgb target linear RGB color
+ * @param rgb target linear RGB color
  */
-static float3 rgbToXyz(const float3& rgb) noexcept
+static simd_f32_4 rgbToXyz(simd_f32_4 rgb) noexcept
 {
-	constexpr auto m = float3x3
+	static const auto m = simd_f32_4x4
 	(
-		0.4124564f, 0.2126729f, 0.0193339f,
-		0.3575761f, 0.7151522f, 0.1191920f,
-		0.1804375f, 0.0721750f, 0.9503041f
+		simd_f32_4(0.4124564f, 0.2126729f, 0.0193339f, 0.0f),
+		simd_f32_4(0.3575761f, 0.7151522f, 0.1191920f, 0.0f),
+		simd_f32_4(0.1804375f, 0.0721750f, 0.9503041f, 0.0f),
+		simd_f32_4::zero
 	);
-	return m * rgb;
+	return multiply3x3(m, rgb);
 }
 /**
  * @brief Converts XYZ color to the linear RGB color space. (CIE 1931)
- * @param[in] xyz target XYZ color
+ * @param xyz target XYZ color
  */
-static float3 xyzToRgb(const float3& xyz) noexcept
+static simd_f32_4 xyzToRgb(simd_f32_4 xyz) noexcept
 {
-	constexpr auto m = float3x3
+	static const auto m = simd_f32_4x4
 	(
-		 3.2404542f, -0.9692660f,  0.0556434f,
-		-1.5371385f,  1.8760108f, -0.2040259f,
-		-0.4985314f,  0.0415560f,  1.0572252f
+		simd_f32_4( 3.2404542f, -0.9692660f,  0.0556434f, 0.0f),
+		simd_f32_4(-1.5371385f,  1.8760108f, -0.2040259f, 0.0f),
+		simd_f32_4(-0.4985314f,  0.0415560f,  1.0572252f, 0.0f),
+		simd_f32_4::zero
 	);
-	return m * xyz;
+	return multiply3x3(m, xyz);
 }
 
 /**
  * @brief Converts XYZ color to the YXY color space.
  * @param xyz target XYZ color
  */
-static float3 xyzToYxy(float3 xyz) noexcept
+static simd_f32_4 xyzToYxy(simd_f32_4 xyz) noexcept
 {
-	auto inv = 1.0f / dot(xyz, float3::one);
-	return float3(xyz.y, xyz.x * inv, xyz.y * inv);
+	auto y = xyz.getY();
+	auto inv = 1.0f / dot3(xyz, simd_f32_4::one);
+	return simd_f32_4(y, xyz.getX() * inv, y * inv);
 }
 /**
  * @brief Converts YXY color to the XYZ color space.
  * @param yxy target YXY color
  */
-static float3 yxyToXyz(float3 yxy) noexcept
+static simd_f32_4 yxyToXyz(simd_f32_4 yxy) noexcept
 {
-	return float3(yxy.x * yxy.y / yxy.z, yxy.x, 
-		yxy.x * (1.0f - yxy.y - yxy.z) / yxy.z);
+	auto x = yxy.getX(), y = yxy.getY(), z = yxy.getZ();
+	return simd_f32_4(x * y / z, x, x * (1.0f - y - z) / z);
 }
 
 /**
  * @brief Converts linear RGB color to the YXY color space.
- * @param[in] rgb target linear RGB color
+ * @param rgb target linear RGB color
  */
-static float3 rgbToYxy(const float3& rgb) noexcept { return xyzToYxy(rgbToXyz(rgb)); }
+static simd_f32_4 rgbToYxy(simd_f32_4 rgb) noexcept { return xyzToYxy(rgbToXyz(rgb)); }
 /**
  * @brief Converts YXY color to the linear RGB color space.
- * @param[in] yxy target YXY color
+ * @param yxy target YXY color
  */
-static float3 yxyToRgb(const float3& yxy) noexcept { return xyzToRgb(yxyToXyz(yxy)); }
+static simd_f32_4 yxyToRgb(simd_f32_4 yxy) noexcept { return xyzToRgb(yxyToXyz(yxy)); }
 
 } // namespace math
\ No newline at end of file
diff --git a/include/math/color.hpp b/include/math/color.hpp
index 253ec99..95dcbcd 100644
--- a/include/math/color.hpp
+++ b/include/math/color.hpp
@@ -69,24 +69,14 @@ struct [[nodiscard]] Color
 			a = toInt32(string(hex.c_str() + 6, 2));
 	}
 	
-	/**
-	 * @brief Creates a new sRGB color structure from the normalized R channel. (Red)
-	 * @param normR target normalized R and G channel color values
-	 */
-	constexpr explicit Color(float normR)
-	{
-		r = (uint8)(std::clamp(normR, 0.0f, 1.0f) * 255.0f + 0.5f);
-		g = b = a = 0;
-	}
 	/**
 	 * @brief Creates a new sRGB color structure from the normalized R and G channels. (Red, Green)
 	 * @param normRg target normalized R and G channel color values
 	 */
 	constexpr explicit Color(float2 normRg)
 	{
-		r = (uint8)(std::clamp(normRg.x, 0.0f, 1.0f) * 255.0f + 0.5f);
-		g = (uint8)(std::clamp(normRg.y, 0.0f, 1.0f) * 255.0f + 0.5f);
-		b = a = 0;
+		auto c = clamp(normRg, float2(0.0f), float2(1.0f)) * 255.0f + 0.5f;
+		r = (uint8)c.x, g = (uint8)c.y, b = a = 0;
 	}
 	/**
 	 * @brief Creates a new sRGB color structure from the normalized RGB channels. (Red, Green, Blue)
@@ -94,10 +84,8 @@ struct [[nodiscard]] Color
 	 */
 	constexpr explicit Color(float3 normRgb)
 	{
-		r = (uint8)(std::clamp(normRgb.x, 0.0f, 1.0f) * 255.0f + 0.5f);
-		g = (uint8)(std::clamp(normRgb.y, 0.0f, 1.0f) * 255.0f + 0.5f);
-		b = (uint8)(std::clamp(normRgb.z, 0.0f, 1.0f) * 255.0f + 0.5f);
-		a = 0;
+		auto c = clamp(normRgb, float3(0.0f), float3(1.0f)) * 255.0f + 0.5f;
+		r = (uint8)c.x, g = (uint8)c.y, b = (uint8)c.z, a = 0;
 	}
 	/**
 	 * @brief Creates a new sRGB color structure from the normalized RGBA channels. (Red, Green, Blue, Alpha)
@@ -105,36 +93,35 @@ struct [[nodiscard]] Color
 	 */
 	constexpr explicit Color(float4 normRgba)
 	{
-		r = (uint8)(std::clamp(normRgba.x, 0.0f, 1.0f) * 255.0f + 0.5f);
-		g = (uint8)(std::clamp(normRgba.y, 0.0f, 1.0f) * 255.0f + 0.5f);
-		b = (uint8)(std::clamp(normRgba.z, 0.0f, 1.0f) * 255.0f + 0.5f);
-		a = (uint8)(std::clamp(normRgba.w, 0.0f, 1.0f) * 255.0f + 0.5f);
+		auto c = clamp(normRgba, float4(0.0f), float4(1.0f)) * 255.0f + 0.5f;
+		r = (uint8)c.x, g = (uint8)c.y, b = (uint8)c.z, a = (uint8)c.w;
+	}
+	/**
+	 * @brief Creates a new sRGB color structure from the normalized RGBA channels. (Red, Green, Blue, Alpha)
+	 * @param normRgba target normalized RGBA channel color SIMD values
+	 */
+	explicit Color(simd_f32_4 normRgba)
+	{
+		auto c = clamp(normRgba, simd_f32_4::zero, simd_f32_4::one) * 255.0f + 0.5f;
+		r = (uint8)c.getX(), g = (uint8)c.getY(), b = (uint8)c.getZ(), a = (uint8)c.getW();
 	}
 	
 	/*******************************************************************************************************************
 	 * @brief Converts sRGB color to the normalized RG vector. (Red, Green)
 	 */
-	constexpr explicit operator float2() const noexcept
-	{
-		constexpr auto mul = (1.0f / 255.0f);
-		return float2(r * mul, g * mul);
-	}
+	constexpr explicit operator float2() const noexcept { return float2(r, g) * (1.0f / 255.0f); }
 	/**
 	 * @brief Converts sRGB color to the normalized RGB vector. (Red, Green, Blue)
 	 */
-	constexpr explicit operator float3() const noexcept
-	{
-		constexpr auto mul = (1.0f / 255.0f);
-		return float3(r * mul, g * mul, b * mul);
-	}
+	constexpr explicit operator float3() const noexcept { return float3(r, g, b) * (1.0f / 255.0f); }
 	/**
 	 * @brief Converts sRGB color to the normalized RGBA vector. (Red, Green, Blue, Alpha)
 	 */
-	constexpr explicit operator float4() const noexcept
-	{
-		constexpr auto mul = (1.0f / 255.0f);
-		return float4(r * mul, g * mul, b * mul, a * mul);
-	}
+	constexpr explicit operator float4() const noexcept { return float4(r, g, b, a) * (1.0f / 255.0f); }
+	/**
+	 * @brief Converts sRGB color to the normalized RGBA SIMD vector. (Red, Green, Blue, Alpha)
+	 */
+	explicit operator simd_f32_4() const noexcept { return simd_f32_4(r, g, b, a) * (1.0f / 255.0f); }
 	/**
 	 * @brief Returns color binary data.
 	 */
@@ -219,31 +206,12 @@ struct [[nodiscard]] Color
 	/**
 	 * @brief Converts sRGB color to the normalized linear RGB color space.
 	 */
-	float4 toLinear4() const noexcept
-	{
-		auto srgb = (float4)*this;
-		return float4(srgbToRgb((float3)srgb), srgb.w);
-	}
-	/**
-	 * @brief Converts sRGB color to the normalized linear RGB color space.
-	 */
-	float3 toLinear3() const noexcept { return srgbToRgb((float3)*this); }
+	simd_f32_4 toLinear() const noexcept { return srgbToRgb((simd_f32_4)*this); }
 
 	/**
 	 * @brief Converts normalized linear RGB color to the sRGB color space.
 	 */
-	static Color fromLinear4(const float4& normRGBA) noexcept
-	{
-		auto srgb = rgbToSrgb((float3)normRGBA);
-		return Color(float4(srgb, normRGBA.w));
-	}
-	/**
-	 * @brief Converts normalized linear RGB color to the sRGB color space.
-	 */
-	static Color fromLinear3(const float3& normRGB) noexcept
-	{
-		return Color(rgbToSrgb(normRGB));
-	}
+	static Color fromLinear(simd_f32_4 normRGBA) noexcept { return Color(rgbToSrgb(normRGBA)); }
 
 	//******************************************************************************************************************
 	constexpr bool operator==(Color c) const noexcept { return r == c.r && g == c.g && b == c.b && a == c.a; }
diff --git a/include/math/line.hpp b/include/math/line.hpp
index 82c6ef7..1dba932 100644
--- a/include/math/line.hpp
+++ b/include/math/line.hpp
@@ -31,16 +31,16 @@ namespace math
 struct [[nodiscard]] Line
 {
 	/**
-	 * @brief Line start point in the space.
+	 * @brief Line start point in 3D space.
 	 */
 	simd_f32_4 start = simd_f32_4::zero;
 	/**
-	 * @brief Line end point in the space.
+	 * @brief Line end point in 3D space.
 	 */
 	simd_f32_4 end = simd_f32_4::zero;
 
 	/**
-	 * @brief Creates a new line structure.
+	 * @brief Creates a new line structure. (In 3D space)
 	 * 
 	 * @param start target line start point in the space
 	 * @param end target line end point in the space
@@ -52,7 +52,7 @@ struct [[nodiscard]] Line
 	Line() = default;
 
 	/**
-	 * @brief Returns line direction vector.
+	 * @brief Returns line direction vector in 3D space.
 	 * @param normalize is direction vector should be normalized
 	 */
 	simd_f32_4 getDirection(bool normalize = true) const noexcept
@@ -71,10 +71,10 @@ struct [[nodiscard]] Line
 };
 
 /**
- * @brief Returns closest point on line to the specified one.
+ * @brief Returns closest point on line to the specified one in 3D space.
  * 
  * @param[in] line target line to use
- * @param point target point in the space
+ * @param point target point in 3D space
  */
 static simd_f32_4 closestPoint(const Line& line, simd_f32_4 point) noexcept
 {
@@ -83,10 +83,10 @@ static simd_f32_4 closestPoint(const Line& line, simd_f32_4 point) noexcept
 	return fma(d, simd_f32_4(std::clamp(t, 0.0f, 1.0f)), a);
 }
 /**
- * @brief Returns closest point on line to the specified one.
+ * @brief Returns closest point on line to the specified one in 3D space.
  *
  * @param[in] line target line to use
- * @param point target point in the space
+ * @param point target point in 3D space
  * @param[out] t distance to the closest point
  */
 static simd_f32_4 closestPoint(const Line& line, simd_f32_4 point, float& t) noexcept
diff --git a/include/math/matrix/float.hpp b/include/math/matrix/float.hpp
index 3e77c20..ed72adf 100644
--- a/include/math/matrix/float.hpp
+++ b/include/math/matrix/float.hpp
@@ -18,7 +18,7 @@
  * 
  * Coordinate system: X-right, Y-up, Z-forward (Left handed)
  * Matrices order: Column-major. (OpenGL, Vulkan like)
- * floatNxM - N columns and M rows
+ * floatCxR -where C columns and R rows
  * 
  * Based on this project: https://github.com/g-truc/glm
  */
@@ -40,9 +40,9 @@ struct [[nodiscard]] float2x2
 
 	/**
 	 * @brief Creates a new floating point 2x2 matrix structure.
-	 * @param v target value for all columns and rows
+	 * @param n target value for all columns and rows
 	 */
-	constexpr explicit float2x2(float v = 0.0f) noexcept : c0(v), c1(v) { }
+	constexpr explicit float2x2(float n = 0.0f) noexcept : c0(n), c1(n) { }
 	/**
 	 * @brief Creates a new floating point 2x2 matrix structure.
 	 * 
@@ -79,14 +79,14 @@ struct [[nodiscard]] float2x2
 		return ((float2*)this)[i];
 	}
 
-	constexpr float2x2 operator+(float v) const noexcept { return float2x2(c0 + v, c1 + v); }
-	constexpr float2x2 operator-(float v) const noexcept { return float2x2(c0 - v, c1 - v); }
-	constexpr float2x2 operator*(float v) const noexcept { return float2x2(c0 * v, c1 * v); }
-	constexpr float2x2 operator/(float v) const noexcept { return float2x2(c0 / v, c1 / v); }
-	float2x2& operator+=(float v) noexcept { c0 += v; c1 += v; return *this; }
-	float2x2& operator-=(float v) noexcept { c0 -= v; c1 -= v; return *this; }
-	float2x2& operator*=(float v) noexcept { c0 *= v; c1 *= v; return *this; }
-	float2x2& operator/=(float v) noexcept { c0 /= v, c1 /= v; return *this; }
+	constexpr float2x2 operator+(float n) const noexcept { return float2x2(c0 + n, c1 + n); }
+	constexpr float2x2 operator-(float n) const noexcept { return float2x2(c0 - n, c1 - n); }
+	constexpr float2x2 operator*(float n) const noexcept { return float2x2(c0 * n, c1 * n); }
+	constexpr float2x2 operator/(float n) const noexcept { return float2x2(c0 / n, c1 / n); }
+	float2x2& operator+=(float n) noexcept { c0 += n; c1 += n; return *this; }
+	float2x2& operator-=(float n) noexcept { c0 -= n; c1 -= n; return *this; }
+	float2x2& operator*=(float n) noexcept { c0 *= n; c1 *= n; return *this; }
+	float2x2& operator/=(float n) noexcept { c0 /= n, c1 /= n; return *this; }
 
 	/**
 	 * @brief Calculates dot product between two matrices.
@@ -147,9 +147,9 @@ struct [[nodiscard]] float3x3
 
 	/**
 	 * @brief Creates a new floating point 3x3 matrix structure.
-	 * @param v target value for all columns and rows
+	 * @param n target value for all columns and rows
 	 */
-	constexpr explicit float3x3(float v = 0.0f) noexcept : c0(v), c1(v), c2(v) { }
+	constexpr explicit float3x3(float n = 0.0f) noexcept : c0(n), c1(n), c2(n) { }
 	/**
 	 * @brief Creates a new floating point 3x3 matrix structure.
 	 *
@@ -194,14 +194,14 @@ struct [[nodiscard]] float3x3
 		return ((float3*)this)[i];
 	}
 
-	constexpr float3x3 operator+(float v) const noexcept { return float3x3(c0 + v, c1 + v, c2 + v); }
-	constexpr float3x3 operator-(float v) const noexcept { return float3x3(c0 - v, c1 - v, c2 - v); }
-	constexpr float3x3 operator*(float v) const noexcept { return float3x3(c0 * v, c1 * v, c2 * v); }
-	constexpr float3x3 operator/(float v) const noexcept { return float3x3(c0 / v, c1 / v, c2 / v); }
-	float3x3& operator+=(float v) noexcept { c0 += v; c1 += v; c2 += v; return *this; }
-	float3x3& operator-=(float v) noexcept { c0 -= v; c1 -= v; c2 -= v; return *this; }
-	float3x3& operator*=(float v) noexcept { c0 *= v; c1 *= v; c2 *= v; return *this; }
-	float3x3& operator/=(float v) noexcept { c0 /= v, c1 /= v; c2 /= v; return *this; }
+	constexpr float3x3 operator+(float n) const noexcept { return float3x3(c0 + n, c1 + n, c2 + n); }
+	constexpr float3x3 operator-(float n) const noexcept { return float3x3(c0 - n, c1 - n, c2 - n); }
+	constexpr float3x3 operator*(float n) const noexcept { return float3x3(c0 * n, c1 * n, c2 * n); }
+	constexpr float3x3 operator/(float n) const noexcept { return float3x3(c0 / n, c1 / n, c2 / n); }
+	float3x3& operator+=(float n) noexcept { c0 += n; c1 += n; c2 += n; return *this; }
+	float3x3& operator-=(float n) noexcept { c0 -= n; c1 -= n; c2 -= n; return *this; }
+	float3x3& operator*=(float n) noexcept { c0 *= n; c1 *= n; c2 *= n; return *this; }
+	float3x3& operator/=(float n) noexcept { c0 /= n, c1 /= n; c2 /= n; return *this; }
 
 	/**
 	 * @brief Calculates dot product between two matrices.
@@ -266,15 +266,16 @@ struct [[nodiscard]] float4x3
 
 	/**
 	 * @brief Creates a new floating point 4x3 matrix structure.
-	 * @param v target value for all columns and rows
+	 * @param n target value for all columns and rows
 	 */
-	constexpr explicit float4x3(float v = 0.0f) noexcept : c0(v), c1(v), c2(v), c3(v) { }
+	constexpr explicit float4x3(float n = 0.0f) noexcept : c0(n), c1(n), c2(n), c3(n) { }
 	/**
 	 * @brief Creates a new floating point 4x3 matrix structure.
 	 *
 	 * @param[in] c0 first matrix column value
 	 * @param[in] c1 second matrix column value
 	 * @param[in] c2 third matrix column value
+	 * @param[in] c3 fourth matrix column value
 	 */
 	constexpr float4x3(const float3& c0, const float3& c1, const float3& c2, const float3& c3)
 		noexcept : c0(c0), c1(c1), c2(c2), c3(c3) { }
@@ -319,14 +320,14 @@ struct [[nodiscard]] float4x3
 		return ((float3*)this)[i];
 	}
 
-	constexpr float4x3 operator+(float v) const noexcept { return float4x3(c0 + v, c1 + v, c2 + v, c3 + v); }
-	constexpr float4x3 operator-(float v) const noexcept { return float4x3(c0 - v, c1 - v, c2 - v, c3 - v); }
-	constexpr float4x3 operator*(float v) const noexcept { return float4x3(c0 * v, c1 * v, c2 * v, c3 * v); }
-	constexpr float4x3 operator/(float v) const noexcept { return float4x3(c0 / v, c1 / v, c2 / v, c3 / v); }
-	float4x3& operator+=(float v) noexcept { c0 += v; c1 += v; c2 += v; c3 += v; return *this; }
-	float4x3& operator-=(float v) noexcept { c0 -= v; c1 -= v; c2 -= v; c3 -= v; return *this; }
-	float4x3& operator*=(float v) noexcept { c0 *= v; c1 *= v; c2 *= v; c3 *= v; return *this; }
-	float4x3& operator/=(float v) noexcept { c0 /= v, c1 /= v; c2 /= v; c3 /= v; return *this; }
+	constexpr float4x3 operator+(float n) const noexcept { return float4x3(c0 + n, c1 + n, c2 + n, c3 + n); }
+	constexpr float4x3 operator-(float n) const noexcept { return float4x3(c0 - n, c1 - n, c2 - n, c3 - n); }
+	constexpr float4x3 operator*(float n) const noexcept { return float4x3(c0 * n, c1 * n, c2 * n, c3 * n); }
+	constexpr float4x3 operator/(float n) const noexcept { return float4x3(c0 / n, c1 / n, c2 / n, c3 / n); }
+	float4x3& operator+=(float n) noexcept { c0 += n; c1 += n; c2 += n; c3 += n; return *this; }
+	float4x3& operator-=(float n) noexcept { c0 -= n; c1 -= n; c2 -= n; c3 -= n; return *this; }
+	float4x3& operator*=(float n) noexcept { c0 *= n; c1 *= n; c2 *= n; c3 *= n; return *this; }
+	float4x3& operator/=(float n) noexcept { c0 /= n, c1 /= n; c2 /= n; c3 /= n; return *this; }
 
 	/**
 	 * @brief Calculates dot product between matrix and vector.
@@ -375,9 +376,9 @@ struct [[nodiscard]] float4x4
 
 	/**
 	 * @brief Creates a new floating point 4x4 matrix structure.
-	 * @param v target value for all columns and rows
+	 * @param n target value for all columns and rows
 	 */
-	constexpr explicit float4x4(float v = 0.0f) noexcept : c0(v), c1(v), c2(v), c3(v) { }
+	constexpr explicit float4x4(float n = 0.0f) noexcept : c0(n), c1(n), c2(n), c3(n) { }
 	/**
 	 * @brief Creates a new floating point 4x4 matrix structure.
 	 *
@@ -391,13 +392,14 @@ struct [[nodiscard]] float4x4
 	/**
 	 * @brief Creates a new floating point 4x4 matrix structure.
 	 *
-	 * @param[in] c0 first matrix column value
-	 * @param[in] c1 second matrix column value
-	 * @param[in] c2 third matrix column value
-	 * @param[in] c3 fourth matrix column value
+	 * @param[in] m4x3 target 4x3 matrix
+	 * @param c0r3 first matrix column fourth component value
+	 * @param c1r3 second matrix column fourth component value
+	 * @param c2r3 third matrix column fourth component value
+	 * @param c3r3 fourth matrix column fourth component value
 	 */
-	constexpr float4x4(const float4x3& m4x3, float c0, float c1, float c2, float c3) noexcept :
-		c0(float4(m4x3.c0, c0)), c1(float4(m4x3.c1, c1)), c2(float4(m4x3.c2, c2)), c3(float4(m4x3.c3, c3)) { }
+	constexpr float4x4(const float4x3& m4x3, float c0r3, float c1r3, float c2r3, float c3r3) noexcept :
+		c0(float4(m4x3.c0, c0r3)), c1(float4(m4x3.c1, c1r3)), c2(float4(m4x3.c2, c2r3)), c3(float4(m4x3.c3, c3r3)) { }
 	/**
 	 * @brief Creates a new floating point 4x4 matrix structure.
 	 * @details See the @ref float4x4.
@@ -416,7 +418,9 @@ struct [[nodiscard]] float4x4
 	 * @brief Returns matrix 4x3 part.
 	 */
 	constexpr explicit operator float4x3() const noexcept
-	{ return float4x3((float3)c0, (float3)c1, (float3)c2, (float3)c3); }
+	{
+		return float4x3((float3)c0, (float3)c1, (float3)c2, (float3)c3);
+	}
 	/**
 	 * @brief Returns matrix 3x3 part.
 	 */
@@ -445,14 +449,14 @@ struct [[nodiscard]] float4x4
 		return ((float4*)this)[i];
 	}
 
-	constexpr float4x4 operator+(float v) const noexcept { return float4x4(c0 + v, c1 + v, c2 + v, c3 + v); }
-	constexpr float4x4 operator-(float v) const noexcept { return float4x4(c0 - v, c1 - v, c2 - v, c3 - v); }
-	constexpr float4x4 operator*(float v) const noexcept { return float4x4(c0 * v, c1 * v, c2 * v, c3 * v); }
-	constexpr float4x4 operator/(float v) const noexcept { return float4x4(c0 / v, c1 / v, c2 / v, c3 / v); }
-	float4x4& operator+=(float v) noexcept { c0 += v; c1 += v; c2 += v; c3 += v; return *this; }
-	float4x4& operator-=(float v) noexcept { c0 -= v; c1 -= v; c2 -= v; c3 -= v; return *this; }
-	float4x4& operator*=(float v) noexcept { c0 *= v; c1 *= v; c2 *= v; c3 *= v; return *this; }
-	float4x4& operator/=(float v) noexcept { c0 /= v, c1 /= v; c2 /= v; c3 /= v; return *this; }
+	constexpr float4x4 operator+(float n) const noexcept { return float4x4(c0 + n, c1 + n, c2 + n, c3 + n); }
+	constexpr float4x4 operator-(float n) const noexcept { return float4x4(c0 - n, c1 - n, c2 - n, c3 - n); }
+	constexpr float4x4 operator*(float n) const noexcept { return float4x4(c0 * n, c1 * n, c2 * n, c3 * n); }
+	constexpr float4x4 operator/(float n) const noexcept { return float4x4(c0 / n, c1 / n, c2 / n, c3 / n); }
+	float4x4& operator+=(float n) noexcept { c0 += n; c1 += n; c2 += n; c3 += n; return *this; }
+	float4x4& operator-=(float n) noexcept { c0 -= n; c1 -= n; c2 -= n; c3 -= n; return *this; }
+	float4x4& operator*=(float n) noexcept { c0 *= n; c1 *= n; c2 *= n; c3 *= n; return *this; }
+	float4x4& operator/=(float n) noexcept { c0 /= n, c1 /= n; c2 /= n; c3 /= n; return *this; }
 
 	/**
 	 * @brief Calculates dot product between two matrices.
@@ -513,49 +517,49 @@ inline const float4x4 float4x4::one = float4x4(1.0f);
 inline const float4x4 float4x4::minusOne = float4x4(-1.0f);
 
 //**********************************************************************************************************************
-static constexpr float4x4 operator+(float v, const float4x4& m) noexcept
+static constexpr float4x4 operator+(float n, const float4x4& m) noexcept
 {
-	return float4x4(v + m.c0, v + m.c1, v + m.c2, v + m.c3);
+	return float4x4(n + m.c0, n + m.c1, n + m.c2, n + m.c3);
 }
-static constexpr float4x4 operator-(float v, const float4x4& m) noexcept
+static constexpr float4x4 operator-(float n, const float4x4& m) noexcept
 {
-	return float4x4(v - m.c0, v - m.c1, v - m.c2, v - m.c3);
+	return float4x4(n - m.c0, n - m.c1, n - m.c2, n - m.c3);
 }
-static constexpr float4x4 operator*(float v, const float4x4& m) noexcept
+static constexpr float4x4 operator*(float n, const float4x4& m) noexcept
 {
-	return float4x4(v * m.c0, v * m.c1, v * m.c2, v * m.c3);
+	return float4x4(n * m.c0, n * m.c1, n * m.c2, n * m.c3);
 }
-static constexpr float4x4 operator/(float v, const float4x4& m) noexcept
+static constexpr float4x4 operator/(float n, const float4x4& m) noexcept
 {
-	return float4x4(v / m.c0, v / m.c1, v / m.c2, v / m.c3);
+	return float4x4(n / m.c0, n / m.c1, n / m.c2, n / m.c3);
 }
 
-static constexpr float4x3 operator+(float v, const float4x3& m) noexcept
+static constexpr float4x3 operator+(float n, const float4x3& m) noexcept
 {
-	return float4x3(v + m.c0, v + m.c1, v + m.c2, v + m.c3);
+	return float4x3(n + m.c0, n + m.c1, n + m.c2, n + m.c3);
 }
-static constexpr float4x3 operator-(float v, const float4x3& m) noexcept
+static constexpr float4x3 operator-(float n, const float4x3& m) noexcept
 {
-	return float4x3(v - m.c0, v - m.c1, v - m.c2, v - m.c3);
+	return float4x3(n - m.c0, n - m.c1, n - m.c2, n - m.c3);
 }
-static constexpr float4x3 operator*(float v, const float4x3& m) noexcept
+static constexpr float4x3 operator*(float n, const float4x3& m) noexcept
 {
-	return float4x3(v * m.c0, v * m.c1, v * m.c2, v * m.c3);
+	return float4x3(n * m.c0, n * m.c1, n * m.c2, n * m.c3);
 }
-static constexpr float4x3 operator/(float v, const float4x3& m) noexcept
+static constexpr float4x3 operator/(float n, const float4x3& m) noexcept
 {
-	return float4x3(v / m.c0, v / m.c1, v / m.c2, v / m.c3);
+	return float4x3(n / m.c0, n / m.c1, n / m.c2, n / m.c3);
 }
 
-static constexpr float3x3 operator+(float v, const float3x3& m) noexcept { return float3x3(v + m.c0, v + m.c1, v + m.c2); }
-static constexpr float3x3 operator-(float v, const float3x3& m) noexcept { return float3x3(v - m.c0, v - m.c1, v - m.c2); }
-static constexpr float3x3 operator*(float v, const float3x3& m) noexcept { return float3x3(v * m.c0, v * m.c1, v * m.c2); }
-static constexpr float3x3 operator/(float v, const float3x3& m) noexcept { return float3x3(v / m.c0, v / m.c1, v / m.c2); }
+static constexpr float3x3 operator+(float n, const float3x3& m) noexcept { return float3x3(n + m.c0, n + m.c1, n + m.c2); }
+static constexpr float3x3 operator-(float n, const float3x3& m) noexcept { return float3x3(n - m.c0, n - m.c1, n - m.c2); }
+static constexpr float3x3 operator*(float n, const float3x3& m) noexcept { return float3x3(n * m.c0, n * m.c1, n * m.c2); }
+static constexpr float3x3 operator/(float n, const float3x3& m) noexcept { return float3x3(n / m.c0, n / m.c1, n / m.c2); }
 
-static constexpr float2x2 operator+(float v, const float2x2& m) noexcept { return float2x2(v + m.c0, v + m.c1); }
-static constexpr float2x2 operator-(float v, const float2x2& m) noexcept { return float2x2(v - m.c0, v - m.c1); }
-static constexpr float2x2 operator*(float v, const float2x2& m) noexcept { return float2x2(v * m.c0, v * m.c1); }
-static constexpr float2x2 operator/(float v, const float2x2& m) noexcept { return float2x2(v / m.c0, v / m.c1); }
+static constexpr float2x2 operator+(float n, const float2x2& m) noexcept { return float2x2(n + m.c0, n + m.c1); }
+static constexpr float2x2 operator-(float n, const float2x2& m) noexcept { return float2x2(n - m.c0, n - m.c1); }
+static constexpr float2x2 operator*(float n, const float2x2& m) noexcept { return float2x2(n * m.c0, n * m.c1); }
+static constexpr float2x2 operator/(float n, const float2x2& m) noexcept { return float2x2(n / m.c0, n / m.c1); }
 
 /***********************************************************************************************************************
  * @brief Calculates dot product between vector and matrix.
@@ -664,7 +668,7 @@ static constexpr float3x3 inverse(float3x3 m) noexcept
 }
 /**
  * @brief Calculates matrix inverse. (Usefull for undoing transformations)
- * @param[in] matrix target matrix to inverse
+ * @param[in] m target matrix to inverse
  */
 static float4x4 inverse(float4x4 m) noexcept
 {
diff --git a/include/math/matrix/projection.hpp b/include/math/matrix/projection.hpp
index 41ddd20..d4cb81e 100644
--- a/include/math/matrix/projection.hpp
+++ b/include/math/matrix/projection.hpp
@@ -17,10 +17,6 @@
  * @brief Common projection matrix functions.
  * 
  * @details
- * Coordinate system: X-right, Y-up, Z-forward (Left handed)
- * Matrices order: Column-major. (OpenGL, Vulkan like)
- * floatNxM - N columns and M rows
- * 
  * Projection matrices are fixed for the Vulkan NDC space.
  * http://matthewwellings.com/blog/the-new-vulkan-coordinate-system/
  * 
diff --git a/include/math/matrix/transform.hpp b/include/math/matrix/transform.hpp
index 0788e10..3d3db81 100644
--- a/include/math/matrix/transform.hpp
+++ b/include/math/matrix/transform.hpp
@@ -15,17 +15,11 @@
 /***********************************************************************************************************************
  * @file
  * @brief Common matrix transformation functions.
- * 
- * @details
- * Coordinate system: X-right, Y-up, Z-forward (Left handed)
- * Matrices order: Column-major. (OpenGL, Vulkan like)
- * floatNxM - N columns and M rows
- * 
- * Based on this project: https://github.com/g-truc/glm
+ * @details Based on this project: https://github.com/g-truc/glm
  */
 
 #pragma once
-#include "math/matrix.hpp"
+#include "math/simd/matrix/transform.hpp"
 
 namespace math
 {
@@ -49,16 +43,6 @@ static constexpr void setTranslation(float4x4& m, float3 t) noexcept
 	m.c3 = float4(t, m.c3.w);
 }
 
-/**
- * @brief Applies translation transformation to an object in 3D space. (r = m * translate(t))
- * 
- * @param[in] m target model matrix to translate
- * @param t target object translation
- */
-static constexpr float4x4 translate(const float4x4& m, float3 t) noexcept
-{
-	return float4x4(m.c0, m.c1, m.c2, m.c0 * t.x + m.c1 * t.y + m.c2 * t.z + m.c3);
-}
 /**
  * @brief Creates a new matrix with a specified object position in 3D space.
  * @param t target object translation
@@ -71,22 +55,41 @@ static constexpr float4x4 translate(float3 t) noexcept
 		0.0f, 0.0f, 1.0f, t.z,
 		0.0f, 0.0f, 0.0f, 1.0f);
 }
-
-
-
 /**
- * @brief Adds translation of an object in 3D space to the matrix.
+ * @brief Applies translation transformation to an object in 3D space. [r = m * translate(t)]
  * 
- * @param[out] m target model matrix to use
- * @param t target object position to add
+ * @param[in] m target model matrix to translate
+ * @param t target object translation
  */
-static constexpr void addTranslation(float4x4& m, float3 t) noexcept
+static constexpr float4x4 translate(const float4x4& m, float3 t) noexcept
 {
-	m.c3 = float4((float3)m.c3 + t, m.c3.w);
+	return float4x4(m.c0, m.c1, m.c2, m.c0 * t.x + m.c1 * t.y + m.c2 * t.z + m.c3);
+}
+/**
+ * @brief Applies translation transformation to an object in 3D space. [r = translate(t) * m]
+ * 
+ * @param t target object translation
+ * @param[in] m target model matrix to translate
+ */
+static constexpr float4x4 translate(float3 t, const float4x4& m) noexcept
+{
+	return float4x4(m.c0, m.c1, m.c2, float4(getTranslation(m) + t, m.c3.w));
 }
 
 /***********************************************************************************************************************
- * @brief Applies scale transformation to an object in 3D space.
+ * @brief Creates a new matrix with a specified object scale in 3D space.
+ * @param s target object scale
+ */
+static constexpr float4x4 scale(float3 s) noexcept
+{
+	return float4x4(
+		s.x, 0.0f, 0.0f, 0.0f,
+		0.0f, s.y, 0.0f, 0.0f,
+		0.0f, 0.0f, s.z, 0.0f,
+		0.0f, 0.0f, 0.0f, 1.0f);
+}
+/**
+ * @brief Applies scale transformation to an object in 3D space. [r = m * scale(s)]
  *
  * @param[in] m target model matrix to scale
  * @param s target object scale
@@ -96,16 +99,15 @@ static constexpr float4x4 scale(const float4x4& m, float3 s) noexcept
 	return float4x4(m.c0 * s.x, m.c1 * s.y, m.c2 * s.z, m.c3);
 }
 /**
- * @brief Creates a new matrix with a specified object scale in 3D space.
+ * @brief Applies scale transformation to an object in 3D space. [r = scale(s) * m]
+ *
+ * @param[in] m target model SIMD matrix to scale
  * @param s target object scale
  */
-static constexpr float4x4 scale(float3 s) noexcept
+static constexpr float4x4 scale(float3 s, const float4x4& m) noexcept
 {
-	return float4x4(
-		s.x, 0.0f, 0.0f, 0.0f,
-		0.0f, s.y, 0.0f, 0.0f,
-		0.0f, 0.0f, s.z, 0.0f,
-		0.0f, 0.0f, 0.0f, 1.0f);
+	auto sm = scale(s);
+	return float4x4(sm * m.c0, sm * m.c1, sm * m.c2, sm * m.c3);
 }
 
 /**
@@ -144,11 +146,11 @@ static constexpr float4x4 rotate(quat q) noexcept
 /**
  * @brief Calculates a new rotation matrix from look vectors in 3D space.
  *
- * @param[in] lookFrom viewer position in 3D space
- * @param[in] lookTo object position in 3D space
- * @param[in] up space up direction vector
+ * @param lookFrom viewer position in 3D space
+ * @param lookTo object position in 3D space
+ * @param up space up direction vector
  */
-static float4x4 rotate(const float3& lookFrom, const float3& lookTo, const float3& up = float3::top) noexcept
+static float4x4 rotate(float3 lookFrom, float3 lookTo, float3 up = float3::top) noexcept
 {
 	auto f = normalize(lookTo - lookFrom);
 	auto s = normalize(cross(up, f));
@@ -163,10 +165,10 @@ static float4x4 rotate(const float3& lookFrom, const float3& lookTo, const float
 /**
  * @brief Calculates a new matrix from direction vector in 3D space.
  *
- * @param[in] front view direction vector in 3D space
- * @param[in] up space up direction vector
+ * @param front view direction vector in 3D space
+ * @param up space up direction vector
  */
-static float4x4 rotate(const float3& front, const float3& up = float3::top) noexcept
+static float4x4 rotate(float3 front, float3 up = float3::top) noexcept
 {
 	auto f = front;
 	auto s = normalize(cross(up, f));
@@ -191,12 +193,25 @@ static float4x4 extractRotation(const float4x4& m) noexcept
 	auto c2 = (float3)m.c2 * invScale.z;
 	return float4x4(float4(c0, 0.0f), float4(c1, 0.0f), float4(c2, 0.0f), float4(0.0f, 0.0f, 0.0f, 1.0f));
 }
+/**
+ * @brief Extracts total matrix rotation transformation of an object in 3D space.
+ * @warning Matrix should be only translated and/or rotated, without scaling!!!
+ * @param[in] m target model matrix to extract from
+ */
+static float4x4 extractRotationOnly(const float4x4& m) noexcept
+{
+	return float4x4(
+		float4((float3)m.c0, 0.0f), 
+		float4((float3)m.c1, 0.0f), 
+		float4((float3)m.c2, 0.0f), 
+		float4(0.0f, 0.0f, 0.0f, 1.0f));
+}
 
 /**
  * @brief Extracts total matrix rotation quaternion of an object in 3D space.
- * @param[in] m target rotation matrix to extract from
+ * @param m target rotation matrix to extract from
  */
-static quat extractQuat(const float3x3& m) noexcept
+static quat extractQuat(float3x3 m) noexcept
 {
 	auto fourXSquaredMinus1 = m.c0.x - m.c1.y - m.c2.z;
 	auto fourYSquaredMinus1 = m.c1.y - m.c0.x - m.c2.z;
@@ -249,10 +264,11 @@ static quat extractQuat(const float4x4& m) noexcept
  * @param[in] rotation object rotation in 3D space
  * @param[in] scale object scale in 3D space
  */
-static float4x4 calcModel(const float3& position = float3::zero,
-	const quat& rotation = quat::identity, const float3& scale = float3::one) noexcept
+static float4x4 calcModel(float3 position = float3::zero,
+	quat rotation = quat::identity, float3 scale = float3::one) noexcept
 {
-	return translate(position) * rotate(normalize(rotation)) * math::scale(scale);
+	return scale == float3::one ? translate(position, rotate(normalize(rotation))) :
+		translate(position) * rotate(normalize(rotation)) * math::scale(scale);
 }
 /**
  * @brief Extracts total matrix position, rotation and scale of an object in 3D space. (Decompose)
@@ -262,21 +278,33 @@ static float4x4 calcModel(const float3& position = float3::zero,
  * @param[out] scale object scale in 3D space
  * @param[out] rotation object rotation in 3D space
  */
-static void extractTransform(const float4x4& m, float3& position, float3& scale, quat& rotation) noexcept
+static void extractTransform(const float4x4& m, float3& position, quat& rotation, float3& scale) noexcept
 {
 	position = getTranslation(m);
-	scale = extractScale(m);
 	rotation = extractQuat(extractRotation(m));
+	scale = extractScale(m);
+}
+/**
+ * @brief Extracts total matrix position and rotation of an object in 3D space. (Decompose)
+ *
+ * @param[in] m target model matrix to extract from
+ * @param[out] position object position in 3D space
+ * @param[out] rotation object rotation in 3D space
+ */
+static void extractTransform(const float4x4& m, float3& position, quat& rotation) noexcept
+{
+	position = getTranslation(m);
+	rotation = extractQuat(extractRotationOnly(m));
 }
 
 /***********************************************************************************************************************
  * @brief Calculates a new model matrix from look vectors in 3D space.
  *
- * @param[in] from viewer position in 3D space
- * @param[in] to object position in 3D space
- * @param[in] up space up direction vector
+ * @param from viewer position in 3D space
+ * @param to object position in 3D space
+ * @param up space up direction vector
  */
-static float4x4 lookAt(const float3& from, const float3& to, const float3& up = float3::top) noexcept
+static float4x4 lookAt(float3 from, float3 to, float3 up = float3::top) noexcept
 {
 	auto f = normalize(to - from);
 	auto s = normalize(cross(up, f));
@@ -290,10 +318,10 @@ static float4x4 lookAt(const float3& from, const float3& to, const float3& up =
 /**
  * @brief Calculates a new quaternion from direction vector in 3D space.
  *
- * @param[in] direction viewer direction in 3D space
- * @param[in] up space up direction vector
+ * @param direction viewer direction in 3D space
+ * @param up space up direction vector
  */
-static quat lookAtQuat(const float3& direction, const float3& up = float3::top) noexcept
+static quat lookAtQuat(float3 direction, float3 up = float3::top) noexcept
 {
 	auto right = cross(up, direction);
 	auto c0 = right * (1.0f / std::sqrt(std::max(0.00001f, dot(right, right))));
@@ -302,4 +330,16 @@ static quat lookAtQuat(const float3& direction, const float3& up = float3::top)
 	return extractQuat(m);
 }
 
+/**
+ * @brief Calculates translated/rotated matrix inverse.
+ * @warning Matrix should be only translated and/or rotated, without scaling!!!
+ * @param[in] m target matrix to inverse
+ */
+static float4x4 inverseTransRot(const float4x4& m)
+{
+	auto t = transpose(m);
+	setTranslation(t, -((float3x3)t * getTranslation(m)));
+	return t;
+}
+
 } // namespace math
\ No newline at end of file
diff --git a/include/math/plane.hpp b/include/math/plane.hpp
index 42a0a3a..001e481 100644
--- a/include/math/plane.hpp
+++ b/include/math/plane.hpp
@@ -38,22 +38,18 @@ struct [[nodiscard]] Plane
 	 */
 	static constexpr uint8 frustumCount = 6;
 protected:
-	simd_f32_4 normal = simd_f32_4::zero;
+	simd_f32_4 normDist = simd_f32_4::zero;
 public:
-	/**
-	 * @brief Distance to the plane in 3D space.
-	 */
-	float distance = 0.0f;
 
 	/**
-	 * @brief Creates a new plane structure.
+	 * @brief Creates a new plane structure. (In 3D space)
 	 * 
-	 * @param normal target plane normal vector
+	 * @param normal target plane normal vector in 3D space
 	 * @param distance target distance to the plane
 	 * @param normalize is normal vector should be normalized
 	 */
 	Plane(simd_f32_4 normal, float distance = 0.0f, bool normalize = true) noexcept :
-		normal(normalize ? normalize3(normal) : normal), distance(distance) { }
+		normDist(normalize ? normalize3(normal) : normal) { normDist.setW(distance); }
 	/**
 	 * @brief Creates a new empty plane structure.
 	 */
@@ -65,34 +61,42 @@ struct [[nodiscard]] Plane
 	 */
 	Plane(const Triangle& triangle) noexcept
 	{
-		normal = cross3(triangle.p1 - triangle.p0, triangle.p2 - triangle.p0);
+		auto normal = cross3(triangle.p1 - triangle.p0, triangle.p2 - triangle.p0);
 		if (length3(normal) > 0.0f)
 			normal = normalize3(normal);
-		distance = dot3(normal, triangle.p0);
+		auto distance = dot3(normal, triangle.p0);
+		normDist = simd_f32_4(normal, distance);
 	}
 
 	/**
-	 * @brief Returns plane normal vector.
+	 * @brief Returns plane normal vector in 3D space.
 	 */
-	simd_f32_4 getNormal() const noexcept { return normal; }
+	const simd_f32_4& getNormal() const noexcept { return normDist; }
 	/**
-	 * @brief Sets plane normal vector.
+	 * @brief Sets plane normal vector in 3D space.
 	 * 
-	 * @param normal target plane normal vector
+	 * @param normal target plane normal vector in 3D space
 	 * @param normalize is normal vector should be normalized
 	 */
 	void setNormal(simd_f32_4 normal, bool normalize = true) noexcept
 	{
-		this->normal = normalize ? normalize3(normal) : normal;
+		normDist = simd_f32_4(normalize ? normalize3(normal) : normal, normDist.getW());
 	}
+
+	/**
+	 * @brief Returns distance to the plane.
+	 */
+	float getDistance() const noexcept { return normDist.getW(); }
+	/**
+	 * @brief Sets distance to the plane.
+	 * @param distance target distance to the plane
+	 */
+	void setDistance(float distance) noexcept { return normDist.setW(distance); }
+
 	/**
 	 * @brief Normalizes plane normal vector.
 	 */
-	void normalize() noexcept
-	{
-		auto l = length3(normal);
-		normal /= l; distance /= l;
-	}
+	void normalize() noexcept { normDist /= length3(normDist); }
 
 	static const Plane left, right, bottom, top, back, front;
 };
@@ -112,10 +116,10 @@ inline const Plane Plane::front = Plane(simd_f32_4(0.0f, 0.0f, 1.0f, 0.0f), 0.0f
  */
 static float distance3(const Plane& plane, simd_f32_4 point) noexcept
 {
-	return dot3(plane.getNormal(), point) + plane.distance;
+	return dot3(plane.getNormal(), point) + plane.getDistance();
 }
 /**
- * @brief Returns closest point on the plane to specified one.
+ * @brief Returns closest point on the plane to specified one in 3D space.
  *
  * @param[in] plane target plane to use
  * @param[in] point target point in 3D space
@@ -126,7 +130,7 @@ static simd_f32_4 closestPoint(const Plane& plane, simd_f32_4 point) noexcept
 }
 
 /**
- * @brief Returns closest point on the triangle to specified one.
+ * @brief Returns closest point on the triangle to specified one in 3D space.
  *
  * @param[in] plane target plane to use
  * @param[in] point target point in 3D space
@@ -137,14 +141,14 @@ static simd_f32_4 closestPoint(const Triangle& triangle, simd_f32_4 point) noexc
 	if (isInside(triangle, p))
 		return p;
 
-	auto ab = Line(triangle.p0, triangle.p1),bc = Line(triangle.p1, triangle.p2), ca = Line(triangle.p2, triangle.p0);
+	auto ab = Line(triangle.p0, triangle.p1), bc = Line(triangle.p1, triangle.p2), ca = Line(triangle.p2, triangle.p0);
 	auto c0 = closestPoint(ab, p), c1 = closestPoint(bc, p), c2 = closestPoint(ca, p);
-	auto m0 = lengthSq3(p - c0), m1 = lengthSq3(p - c1), m2 = lengthSq3(p - c2);
-	auto m = min(m0, m1, m2);
+	auto v = simd_f32_4(lengthSq3(p - c0), lengthSq3(p - c1), lengthSq3(p - c2));
+	auto m = min3(v);
 
-	if (m == m0)
+	if (m == v.getX())
 		return c0;
-	if (m == m1)
+	if (m == v.getY())
 		return c1;
 	return c2;
 }
@@ -158,16 +162,16 @@ static simd_f32_4 closestPoint(const Triangle& triangle, simd_f32_4 point) noexc
  */
 static void extractFrustumPlanes(const simd_f32_4x4& frustum, Plane* planes) noexcept
 {
-	auto t = transpose(frustum);
+	auto t = transpose4x4(frustum);
+	auto c0W = t.c0.getW(), c1W = t.c1.getW(), c2W = t.c2.getW(), c3W = t.c3.getW();
 	
 	// Gribb & Hartmann method
-	planes[0] = Plane(simd_f32_4(frustum.columns[0].getW() + frustum.columns[0].getX(), frustum.columns[1].getW() + frustum.columns[1].getX(), frustum.columns[2].getW() + frustum.columns[2].getX(), 0.0f), frustum.columns[3].getW() + frustum.columns[3].getX(), false);
-	planes[1] = Plane(simd_f32_4(frustum.columns[0].getW() - frustum.columns[0].getX(), frustum.columns[1].getW() - frustum.columns[1].getX(), frustum.columns[2].getW() - frustum.columns[2].getX(), 0.0f), frustum.columns[3].getW() - frustum.columns[3].getX(), false);
-	planes[2] = Plane(simd_f32_4(frustum.columns[0].getW() - frustum.columns[0].getY(), frustum.columns[1].getW() - frustum.columns[1].getY(), frustum.columns[2].getW() - frustum.columns[2].getY(), 0.0f), frustum.columns[3].getW() - frustum.columns[3].getY(), false); // Flipped Vulkan NDC!
-	planes[3] = Plane(simd_f32_4(frustum.columns[0].getW() + frustum.columns[0].getY(), frustum.columns[1].getW() + frustum.columns[1].getY(), frustum.columns[2].getW() + frustum.columns[2].getY(), 0.0f), frustum.columns[3].getW() + frustum.columns[3].getY(), false); // Flipped Vulkan NDC!
-	planes[4] = Plane(simd_f32_4(frustum.columns[0].getZ()                            , frustum.columns[1].getZ()                            , frustum.columns[2].getZ()                            , 0.0f), frustum.columns[3].getZ()                  , false);
-	planes[5] = Plane(simd_f32_4(frustum.columns[0].getW() - frustum.columns[0].getZ(), frustum.columns[1].getW() - frustum.columns[1].getZ(), frustum.columns[2].getW() - frustum.columns[2].getZ(), 0.0f), frustum.columns[3].getW() - frustum.columns[3].getZ(), false);
-	// TODO: maybe we can optimize this with swizzle or other
+	planes[0] = Plane(simd_f32_4(t.c3 + t.c0, 0.0f), c3W + c0W, false);
+	planes[1] = Plane(simd_f32_4(t.c3 - t.c0, 0.0f), c3W - c0W, false);
+	planes[2] = Plane(simd_f32_4(t.c3 - t.c1, 0.0f), c3W - c1W, false); // Flipped Vulkan NDC!
+	planes[3] = Plane(simd_f32_4(t.c3 + t.c1, 0.0f), c3W + c1W, false); // Flipped Vulkan NDC!
+	planes[4] = Plane(simd_f32_4(t.c2,        0.0f),       c2W, false);
+	planes[5] = Plane(simd_f32_4(t.c3 - t.c2, 0.0f), c3W - c2W, false);
 }
 /**
  * @brief Normalizes specified planes.
diff --git a/include/math/quaternion.hpp b/include/math/quaternion.hpp
index 9f0b054..81ba543 100644
--- a/include/math/quaternion.hpp
+++ b/include/math/quaternion.hpp
@@ -14,7 +14,7 @@
 
 /***********************************************************************************************************************
  * @file
- * @brief Common quaternion functions.
+ * @brief Common floating point quaternion functions.
  * 
  * @details
  * Quaternion is a mathematical concept used to represent 3D rotations. It consists of four components (one real part 
@@ -27,7 +27,7 @@
  */
 
 #pragma once
-#include "math/vector/float.hpp"
+#include "math/simd/quaternion.hpp"
 
 namespace math
 {
@@ -40,30 +40,31 @@ struct [[nodiscard]] quat : public float4
 {
 	/**
 	 * @brief Creates a new quaternion structure.
-	 * 
-	 * @param x imaginary vector X part.
-	 * @param y imaginary vector Y part.
-	 * @param z imaginary vector Z part.
-	 * @param w real scalar part.
+	 * @note Identity quaternion represents rotation of 0 degrees around all axis.
+	 * @param xyzw quaternion parts vector
 	 */
-	constexpr quat(float x, float y, float z, float w) noexcept : float4(x, y, z, w) { }
+	constexpr quat(float4 xyzw = identity) noexcept : float4(xyzw) { }
 	/**
-	 * @brief Creates a new identity quaternion structure.
-	 * @param[in] xyzw quaternion parts vector.
+	 * @brief Creates a new quaternion structure.
+	 * 
+	 * @param x imaginary vector X part
+	 * @param y imaginary vector Y part
+	 * @param z imaginary vector Z part
+	 * @param w real scalar part
 	 */
-	constexpr quat(const float4& xyzw = quat::identity) noexcept : float4(xyzw) { }
+	constexpr quat(float x, float y, float z, float w) noexcept : float4(x, y, z, w) { }
 	/**
 	 * @brief Creates a new quaternion structure from the euler angles. (In radians)
 	 * @param[in] eulerAngles target euler angles vector
 	 */
-	quat(const float3& eulerAngles) noexcept
+	quat(float3 eulerAngles) noexcept
 	{
-		auto sin = math::sin(eulerAngles * 0.5f);
-		auto cos = math::cos(eulerAngles * 0.5f);
-		x = sin.x * cos.y * cos.z - cos.x * sin.y * sin.z;
-		y = cos.x * sin.y * cos.z + sin.x * cos.y * sin.z;
-		z = cos.x * cos.y * sin.z - sin.x * sin.y * cos.z;
-		w = cos.x * cos.y * cos.z + sin.x * sin.y * sin.z;
+		auto s = sin(eulerAngles * 0.5f);
+		auto c = cos(eulerAngles * 0.5f);
+		x = s.x * c.y * c.z - c.x * s.y * s.z;
+		y = c.x * s.y * c.z + s.x * c.y * s.z;
+		z = c.x * c.y * s.z - s.x * s.y * c.z;
+		w = c.x * c.y * c.z + s.x * s.y * s.z;
 	}
 	/**
 	 * @brief Creates a new quaternion structure from the angle and axis. (In radians)
@@ -73,10 +74,10 @@ struct [[nodiscard]] quat : public float4
 	 */
 	quat(float angle, float3 axis) noexcept
 	{
-		auto sinus = std::sin(angle * 0.5f);
-		x = axis.x * sinus;
-		y = axis.y * sinus;
-		z = axis.z * sinus;
+		auto s = std::sin(angle * 0.5f);
+		x = axis.x * s;
+		y = axis.y * s;
+		z = axis.z * s;
 		w = std::cos(angle * 0.5f);
 	}
 
@@ -94,50 +95,45 @@ struct [[nodiscard]] quat : public float4
 	}
 	/**
 	 * @brief Rotates vector by this quaternion.
-	 * @param[in] v target vector to rotate
+	 * @param v target vector to rotate
 	 */
-	constexpr float3 operator*(const float3& v) const noexcept
+	float3 operator*(float3 v) const noexcept
 	{
-		auto q = float3(x, y, z);
-		auto cq = math::cross(q, v);
-		auto ccq = math::cross(q, cq);
-		return v + ((cq * w) + ccq) * 2.0f;
+		auto q = (float3)*this;
+		auto cq = math::cross(q, v), ccq = math::cross(q, cq);
+		return fma(fma(cq, float3(w), ccq), float3(2.0f), v);
 	}
 	/**
 	 * @brief Rotates this quaternion by the specified one.
-	 * @param[in] q target quaternion to rotate by
+	 * @param q target quaternion to rotate by
 	 */
-	quat& operator*=(const quat& q) noexcept { return *this = *this * q; }
+	quat& operator*=(quat q) noexcept { return *this = *this * q; }
 
 	/**
 	 * @brief Extracts quaternion rotation around X axis. (In radians)
 	 */
-	float getPitch() const noexcept
+	float extractPitch() const noexcept
 	{
 		auto yy = 2.0f * (y * z + w * x);
 		auto xx = w * w - x * x - y * y + z * z;
 
 		// Avoid atan2(0, 0) - handle singularity - Matiis
-		if (std::abs(xx) <= numeric_limits<float>::epsilon() ||
-			std::abs(yy) <= numeric_limits<float>::epsilon())
-		{
+		if (std::abs(xx) <= FLT_EPSILON || std::abs(yy) <= FLT_EPSILON)
 			return 2.0f * std::atan2(x, w);
-		}
-
 		return std::atan2(yy, xx);
 	}
 	/**
 	 * @brief Extracts quaternion around Y axis. (In radians)
 	 */
-	float getYaw() const noexcept { return std::asin(std::clamp(-2.0f * (x * z - w * y), -1.0f, 1.0f)); }
+	float extractYaw() const noexcept { return std::asin(std::clamp(-2.0f * (x * z - w * y), -1.0f, 1.0f)); }
 	/**
 	 * @brief Extracts quaternion around Z axis. (In radians)
 	 */
-	float getRoll() const noexcept { return std::atan2(2.0f * (x * y + w * z), w * w + x * x - y * y - z * z); }
+	float extractRoll() const noexcept { return std::atan2(2.0f * (x * y + w * z), w * w + x * x - y * y - z * z); }
 	/**
 	 * @brief Calculates quaternion euler angles. (In radians)
 	 */
-	float3 toEulerAngles() const noexcept { return float3(getPitch(), getYaw(), getRoll()); }
+	float3 extractEulerAngles() const noexcept { return float3(extractPitch(), extractYaw(), extractRoll()); }
 
 	/**
 	 * @brief Quaternion with zero rotation.
@@ -151,40 +147,32 @@ inline const quat quat::identity = quat(0.0f, 0.0f, 0.0f, 1.0f);
  * @brief Normalizes quaternion.
  * @param q target quaternion to normalize
  */
-static quat normalize(quat q) noexcept
-{
-	auto l = length(q);
-	assert(l > 0.0f);
-	auto il = (1.0f / l);
-	return quat(q.x * il, q.y * il, q.z * il, q.w * il);
-}
+static quat normalize(quat q) noexcept { return (float4)q * (1.0f / length(q)); }
+
 /**
  * @brief Quaternion spherical linear interpolation from a to b.
  * 
  * @param a starting quaternion value
- * @param[in] b target quaternion value
+ * @param b target quaternion value
  * @param t interpolation value (from 0.0 to 1.0)
  */
-static quat slerp(quat a, const quat& b, float t) noexcept
+static quat slerp(quat a, quat b, float t) noexcept
 {
-	auto cosTheta = dot(a, b);
-	auto c = (float4)b;
-
+	auto cosTheta = dot(a, b); auto c = (float4)b;
 	if (cosTheta < 0.0f)
 	{
 		c = -b;
 		cosTheta = -cosTheta;
 	}
 
-	if (cosTheta > 1.0f - numeric_limits<float>::epsilon())
+	if (cosTheta > 1.0f - FLT_EPSILON)
 	{
-		return quat(lerp(a.x, c.x, t), lerp(a.y, c.y, t), lerp(a.z, c.z, t), lerp(a.w, c.w, t));
+		return lerp(a, c, t);
 	}
 	else
 	{
 		auto angle = std::acos(cosTheta);
-		return ((float4)a * std::sin((1.0f - t) * angle) + 
-			c * std::sin(t * angle)) / std::sin(angle);
+		return ((float4)a * std::sin((1.0f - t) * angle) + c * std::sin(t * angle)) / std::sin(angle);
 	}
 }
 
@@ -195,16 +183,16 @@ static quat slerp(quat a, const quat& b, float t) noexcept
 static constexpr quat conjugate(quat q) noexcept { return quat(-q.x, -q.y, -q.z, q.w); }
 /**
  * @brief Calculates inverse of the quaternion.
- * @param[in] q target quaternion to inverse
+ * @param q target quaternion to inverse
  */
-static constexpr quat inverse(const quat& q) noexcept { return conjugate(q) / dot(q, q);  }
+static constexpr quat inverse(quat q) noexcept { return conjugate(q) / dot(q, q);  }
 
 /**
  * @brief Rotates vector by the inversed quaternion.
  * 
- * @param[in] v target vector to rotate
- * @param[in] q target quaternion to use
+ * @param v target vector to rotate
+ * @param q target quaternion to use
  */
-static constexpr float3 operator*(const float3& v, const quat& q) noexcept { return inverse(q) * v; }
+static float3 operator*(float3 v, quat q) noexcept { return inverse(q) * v; }
 
 } // namespace math
\ No newline at end of file
diff --git a/include/math/ray.hpp b/include/math/ray.hpp
index c0653f5..401e74f 100644
--- a/include/math/ray.hpp
+++ b/include/math/ray.hpp
@@ -32,6 +32,7 @@ struct [[nodiscard]] Ray
 {
 protected:
 	simd_f32_4 direction = simd_f32_4::zero;
+	simd_u32_4 isParallel = simd_u32_4::zero;
 public:
 	/**
 	 * @brief Ray origin point in 3D space.
@@ -39,10 +40,10 @@ struct [[nodiscard]] Ray
 	simd_f32_4 origin = simd_f32_4::zero;
 
 	/**
-	 * @brief Creates a new ray structure.
+	 * @brief Creates a new ray structure. (In 3D space)
 	 * 
-	 * @param[in] origin ray origin point in 3D space
-	 * @param[in] direction ray direction vector in 3D space
+	 * @param origin ray origin point in 3D space
+	 * @param direction ray direction vector in 3D space
 	 * @param normalize is direction vector should be normalized
 	 */
 	Ray(simd_f32_4 origin, simd_f32_4 direction, bool normalize = true) noexcept : 
@@ -55,18 +56,24 @@ struct [[nodiscard]] Ray
 	/**
 	 * @brief Returns ray direction vector in 3D space.
 	 */
-	simd_f32_4 getDirection() const noexcept { return direction; }
+	const simd_f32_4& getDirection() const noexcept { return direction; }
 	/**
 	 * @brief Sets ray direction vector in 3D space.
 	 * 
-	 * @param[in] direction target ray direction vector
+	 * @param direction target ray direction vector
 	 * @param normalize is direction vector should be normalized
 	 */
 	void setDirection(simd_f32_4 direction, bool normalize = true) noexcept
 	{
 		this->direction = normalize ? normalize3(direction) : direction;
+		isParallel = abs(direction) <= simd_f32_4(1.0e-20f);
 	}
 
+	/**
+	 * @brief Returns which of the ray direction axes are parallel.
+	 */
+	const simd_u32_4& getParallel() const noexcept { return isParallel; }
+
 	/**
 	 * @brief Normalizes ray direction vector.
 	 */
@@ -92,7 +99,7 @@ inline const Ray Ray::back = Ray(simd_f32_4::zero, simd_f32_4(0.0f, 0.0f, -1.0f,
 inline const Ray Ray::front = Ray(simd_f32_4::zero, simd_f32_4(0.0f, 0.0f, 1.0f, 0.0f), false);
 
 /**
- * @brief Returns closest point on ray to the specified one.
+ * @brief Returns closest point on ray to the specified one in 3D space.
  *
  * @param[in] ray target ray to use
  * @param point target point in 3D space
diff --git a/include/math/sh.hpp b/include/math/sh.hpp
index c6234c9..d6390fa 100644
--- a/include/math/sh.hpp
+++ b/include/math/sh.hpp
@@ -32,6 +32,8 @@
 #include "math/ibl.hpp"
 #include "math/matrix.hpp"
 
+// TODO: use SIMD where possible, but check if SH results are identical.
+
 namespace math::sh
 {
 
diff --git a/include/math/simd/matrix/float.hpp b/include/math/simd/matrix/float.hpp
index c28f670..fe914b8 100644
--- a/include/math/simd/matrix/float.hpp
+++ b/include/math/simd/matrix/float.hpp
@@ -19,18 +19,19 @@
 
 #pragma once
 #include "math/matrix/float.hpp"
-#include "math/simd/vector/float.hpp"
+#include "math/simd/quaternion.hpp"
 
 namespace math
 {
 
 /**
  * @brief SIMD 32bit floating point 4x4 matrix structure.
+ * @details Commonly used for basic transformations: translation, scale, rotation, etc.
  * @note Use it when you know how to implement a faster vectorized code.
  */
 struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4
 {
-	simd_f32_4 columns[4];
+	simd_f32_4 c0, c1, c2, c3;
 
 	/**
 	 * @brief Creates a new zero initialized SIMD 32bit floating point 4x4 matrix structure.
@@ -45,24 +46,35 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4
 	 * @param c3 fourth matrix column value
 	 */
 	simd_f32_4x4(simd_f32_4 c0, simd_f32_4 c1, simd_f32_4 c2, simd_f32_4 c3) noexcept :
-		columns { c0, c1, c2, c3 } { };
+		c0(c0), c1(c1), c2(c2), c3(c3) { }
 	/**
 	 * @brief Creates a new SIMD 32bit floating point 4x4 matrix structure.
-	 * @param xyzw target value for all matrix vector components
+	 * @warning This constructor duplicates second column vector to the third column!
+	 *
+	 * @param c0 first matrix column value
+	 * @param c1 second matrix column value
+	 * @param c2 third matrix column value
+	 */
+	simd_f32_4x4(simd_f32_4 c0, simd_f32_4 c1, simd_f32_4 c2) noexcept :
+		c0(c0), c1(c1), c2(c2), c3(c2) { }
+	/**
+	 * @brief Creates a new SIMD 32bit floating point 4x4 matrix structure.
+	 * @param n target value for all matrix vector components
 	 */
-	explicit simd_f32_4x4(float v) noexcept :
-		columns { simd_f32_4(v), simd_f32_4(v), simd_f32_4(v), simd_f32_4(v) } { };
+	explicit simd_f32_4x4(float n) noexcept : c0(n), c1(n), c2(n), c3(n) { }
 	/**
 	 * @brief Creates a new SIMD 32bit floating point 4x4 matrix structure.
-	 * @param v target 4x4 matrix value
+	 * @param[in] m target 4x4 matrix value
 	 */
-	explicit simd_f32_4x4(const float4x4& v) noexcept { *this = *((const simd_f32_4x4*)&v); }
+	explicit simd_f32_4x4(const float4x4& m) noexcept { *this = *((const simd_f32_4x4*)&m); }
 	/**
 	 * @brief Creates a new SIMD 32bit floating point 4x4 matrix structure.
-	 * @param v target 3x3 matrix value
+	 * 
+	 * @param[in] m target 3x3 matrix value
+	 * @param c3 third columns SIMD vector
 	 */
-	explicit simd_f32_4x4(const float3x3& v, simd_f32_4 c3 = simd_f32_4::zero) noexcept : columns 
-		{ (simd_f32_4)float4(v.c0, v.c0.z), (simd_f32_4)float4(v.c1, v.c1.z), (simd_f32_4)float4(v.c2, v.c2.z), c3 } { }
+	explicit simd_f32_4x4(const float3x3& m, simd_f32_4 c3 = simd_f32_4::zero) noexcept : 
+		c0(float4(m.c0, m.c0.z)), c1(float4(m.c1, m.c1.z)), c2(float4(m.c2, m.c2.z)), c3(c3) { }
 
 	/**
 	 * @brief Returns SIMD matrix column by index.
@@ -71,7 +83,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4
 	simd_f32_4& operator[](psize i) noexcept
 	{
 		assert(i <= 3);
-		return columns[i];
+		return ((simd_f32_4*)this)[i];
 	}
 	/**
 	 * @brief Returns SIMD matrix column by index.
@@ -80,49 +92,75 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4
 	simd_f32_4 operator[](psize i) const noexcept
 	{
 		assert(i <= 3);
-		return columns[i];
+		return ((simd_f32_4*)this)[i];
+	}
+
+	/**
+	 * @brief Returns SIMD matrix as 4x4 floating point matrix.
+	 */
+	explicit operator float4x4() const noexcept
+	{
+		return *((const float4x4*)this);
 	}
+	/**
+	 * @brief Returns SIMD matrix as 4x3 floating point matrix.
+	 */
+	explicit operator float4x3() const noexcept
+	{
+		return float4x3((float3)c0, (float3)c1, (float3)c2, (float3)c3);
+	}
+	/**
+	 * @brief Returns SIMD matrix as 3x3 floating point matrix.
+	 */
+	explicit operator float3x3() const noexcept
+	{
+		return float3x3((float3)c0, (float3)c1, (float3)c2);
+	}
+	/**
+	 * @brief Returns SIMD matrix as 3x3 floating point matrix.
+	 */
+	explicit operator float2x2() const noexcept { return float2x2((float2)c0, (float2)c1); }
 
 	/*******************************************************************************************************************
 	 * @brief Adds specified value to the all SIMD matrix columns.
-	 * @param v target multiplier value
+	 * @param n target value to add
 	 */
-	simd_f32_4x4 operator+(float v) const noexcept
+	simd_f32_4x4 operator+(float n) const noexcept
 	{
-		auto t = simd_f32_4(v);
-		return simd_f32_4x4(columns[0] + t, columns[1] + t, columns[2] + t, columns[3] + t);
+		auto t = simd_f32_4(n);
+		return simd_f32_4x4(c0 + t, c1 + t, c2 + t, c3 + t);
 	}
 	/**
 	 * @brief Subdivides specified value from the all SIMD matrix columns.
-	 * @param v target multiplier value
+	 * @param n target value to subdivide
 	 */
-	simd_f32_4x4 operator-(float v) const noexcept
+	simd_f32_4x4 operator-(float n) const noexcept
 	{
-		auto t = simd_f32_4(v);
-		return simd_f32_4x4(columns[0] - t, columns[1] - t, columns[2] - t, columns[3] - t);
+		auto t = simd_f32_4(n);
+		return simd_f32_4x4(c0 - t, c1 - t, c2 - t, c3 - t);
 	}
 	/**
 	 * @brief Multiplies all SIMD matrix columns by the specified value.
-	 * @param v target multiplier value
+	 * @param n target value to multiply by
 	 */
-	simd_f32_4x4 operator*(float v) const noexcept
+	simd_f32_4x4 operator*(float n) const noexcept
 	{
-		auto t = simd_f32_4(v);
-		return simd_f32_4x4(columns[0] * t, columns[1] * t, columns[2] * t, columns[3] * t);
+		auto t = simd_f32_4(n);
+		return simd_f32_4x4(c0 * t, c1 * t, c2 * t, c3 * t);
 	}
 	/**
 	 * @brief Divides all SIMD matrix columns by the specified value.
-	 * @param v target multiplier value
+	 * @param n target value to divide by
 	 */
-	simd_f32_4x4 operator/(float v) const noexcept
+	simd_f32_4x4 operator/(float n) const noexcept
 	{
-		auto t = simd_f32_4(v);
-		return simd_f32_4x4(columns[0] / t, columns[1] / t, columns[2] / t, columns[3] / t);
+		auto t = simd_f32_4(n);
+		return simd_f32_4x4(c0 / t, c1 / t, c2 / t, c3 / t);
 	}
 
 	/**
 	 * @brief Calculates dot product between two SIMD matrices.
-	 * @param m target SIMD matrix to dot by
+	 * @param[in] m target SIMD matrix to dot by
 	 */
 	simd_f32_4x4 operator*(const simd_f32_4x4& m) const noexcept
 	{
@@ -130,25 +168,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4
 		#if defined(MATH_SIMD_SUPPORT_SSE)
 		for (int i = 0; i < 4; ++i)
 		{
-			auto c = m.columns[i].data;
-			auto r = _mm_mul_ps(columns[0].data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)));
-			r = _mm_add_ps(r, _mm_mul_ps(columns[1].data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1))));
-			r = _mm_add_ps(r, _mm_mul_ps(columns[2].data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2))));
-			r = _mm_add_ps(r, _mm_mul_ps(columns[3].data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(3, 3, 3, 3))));
-			result.columns[i].data = r;
+			auto c = m[i].data;
+			auto r = _mm_mul_ps(c0.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)));
+			r = _mm_add_ps(r, _mm_mul_ps(c1.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1))));
+			r = _mm_add_ps(r, _mm_mul_ps(c2.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2))));
+			r = _mm_add_ps(r, _mm_mul_ps(c3.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(3, 3, 3, 3))));
+			result[i].data = r;
 		}
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		for (int i = 0; i < 4; ++i)
 		{
-			auto c = m.columns[i].data;
-			auto r = vmulq_f32(columns[0].data, vdupq_laneq_f32(c, 0));
-			r = vmlaq_f32(r, columns[1].data, vdupq_laneq_f32(c, 1));
-			r = vmlaq_f32(r, columns[2].data, vdupq_laneq_f32(c, 2));
-			r = vmlaq_f32(r, columns[3].data, vdupq_laneq_f32(c, 3));
-			result.columns[i].data = r;
+			auto c = m[i].data;
+			auto r = vmulq_f32(c0.data, vdupq_laneq_f32(c, 0));
+			r = vmlaq_f32(r, c1.data, vdupq_laneq_f32(c, 1));
+			r = vmlaq_f32(r, c2.data, vdupq_laneq_f32(c, 2));
+			r = vmlaq_f32(r, c3.data, vdupq_laneq_f32(c, 3));
+			result[i].data = r;
 		}
 		#else
-		result = simd_f32_4x4((*((const float4x4*)this) * *((const float4x4*)&m)));
+		result = simd_f32_4x4((*((const float4x4*)this) * (*((const float4x4*)&m))));
 		#endif
 		return result;
 	}
@@ -159,19 +197,19 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4
 	simd_f32_4 operator*(simd_f32_4 v) const noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		auto r = _mm_mul_ps(columns[0].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0)));
-		r = _mm_add_ps(r, _mm_mul_ps(columns[1].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1))));
-		r = _mm_add_ps(r, _mm_mul_ps(columns[2].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2))));
-		r = _mm_add_ps(r, _mm_mul_ps(columns[3].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 3, 3, 3))));
+		auto r = _mm_mul_ps(c0.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0)));
+		r = _mm_add_ps(r, _mm_mul_ps(c1.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1))));
+		r = _mm_add_ps(r, _mm_mul_ps(c2.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2))));
+		r = _mm_add_ps(r, _mm_mul_ps(c3.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 3, 3, 3))));
 		return r;
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		auto r = vmulq_f32(mCcolumnsol[0].data, vdupq_laneq_f32(v.data, 0));
-		r = vmlaq_f32(r, columns[1].data, vdupq_laneq_f32(v.data, 1));
-		r = vmlaq_f32(r, columns[2].data, vdupq_laneq_f32(v.data, 2));
-		r = vmlaq_f32(r, columns[3].data, vdupq_laneq_f32(v.data, 3));
+		auto r = vmulq_f32(c0.data, vdupq_laneq_f32(v.data, 0));
+		r = vmlaq_f32(r, c1.data, vdupq_laneq_f32(v.data, 1));
+		r = vmlaq_f32(r, c2.data, vdupq_laneq_f32(v.data, 2));
+		r = vmlaq_f32(r, c3.data, vdupq_laneq_f32(v.data, 3));
 		return r;
 		#else
-		return simd_f32_4((*((const float4x4*)this) * *((const float4*)&v)));
+		return simd_f32_4((*((const float4x4*)this) * (*((const float4*)&v))));
 		#endif
 	}
 
@@ -183,12 +221,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4x4
 	
 	bool operator==(const simd_f32_4x4& m) const noexcept
 	{
-		return areAllTrue(compare(columns[0], m.columns[0]) & compare(columns[1], m.columns[1]) &
-			compare(columns[2], m.columns[2]) & compare(columns[3], m.columns[3]));
+		return areAllTrue(equal(c0, m.c0) & equal(c1, m.c1) & equal(c2, m.c2) & equal(c3, m.c3));
+	}
+	bool operator!=(const simd_f32_4x4& m) const noexcept
+	{
+		return areAnyTrue(notEqual(c0, m.c0) | notEqual(c1, m.c1) | notEqual(c2, m.c2) | notEqual(c3, m.c3));
 	}
-	bool operator!=(const simd_f32_4x4& m) const noexcept { return !(*this == m); }
 
-	static const simd_f32_4x4 zero, one, minusOne, inf, minusInf, nan, identity;
+	static const simd_f32_4x4 zero, one, minusOne, inf, minusInf, nan, min, max, epsilon, identity;
 };
 
 //**********************************************************************************************************************
@@ -198,6 +238,9 @@ inline const simd_f32_4x4 simd_f32_4x4::minusOne = simd_f32_4x4(-1.0f);
 inline const simd_f32_4x4 simd_f32_4x4::inf = simd_f32_4x4(INFINITY);
 inline const simd_f32_4x4 simd_f32_4x4::minusInf = simd_f32_4x4(-INFINITY);
 inline const simd_f32_4x4 simd_f32_4x4::nan = simd_f32_4x4(NAN);
+inline const simd_f32_4x4 simd_f32_4x4::min = simd_f32_4x4(FLT_MIN);
+inline const simd_f32_4x4 simd_f32_4x4::max = simd_f32_4x4(FLT_MAX);
+inline const simd_f32_4x4 simd_f32_4x4::epsilon = simd_f32_4x4(FLT_EPSILON);
 inline const simd_f32_4x4 simd_f32_4x4::identity = simd_f32_4x4(
 	simd_f32_4(1.0f, 0.0f, 0.0f, 0.0f),
 	simd_f32_4(0.0f, 1.0f, 0.0f, 0.0f),
@@ -208,26 +251,89 @@ inline const simd_f32_4x4 simd_f32_4x4::identity = simd_f32_4x4(
 /**
  * @brief Returns true if first SIMD matrix binary representation is less than the second.
  *
- * @param a first SIMD matrix to binary compare
- * @param b second SIMD matrix to binary compare
+ * @param[in] a first SIMD matrix to binary compare
+ * @param[in] b second SIMD matrix to binary compare
  */
-static bool isBinaryLess(simd_f32_4x4 a, simd_f32_4x4 b) noexcept { return memcmp(&a, &b, sizeof(simd_f32_4x4)) < 0; }
+static bool isBinaryLess(const simd_f32_4x4& a, const simd_f32_4x4& b) noexcept
+{
+	return memcmp(&a, &b, sizeof(simd_f32_4x4)) < 0;
+}
+
+/**
+ * @brief Calculates 3x3 dot product between two SIMD matrices.
+ * @param[in] a first SIMD matrix to use
+ * @param[in] b second SIMD matrix to use
+ */
+static simd_f32_4x4 multiply3x3(const simd_f32_4x4& a, const simd_f32_4x4& b) noexcept
+{
+	simd_f32_4x4 result;
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	for (int i = 0; i < 4; ++i)
+	{
+		auto c = b[i].data;
+		auto r = _mm_mul_ps(a.c0.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)));
+		r = _mm_add_ps(r, _mm_mul_ps(a.c1.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1))));
+		r = _mm_add_ps(r, _mm_mul_ps(a.c2.data, _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2))));
+		result[i].data = r;
+	}
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	for (int i = 0; i < 4; ++i)
+	{
+		auto c = b[i].data;
+		auto r = vmulq_f32(a.c0.data, vdupq_laneq_f32(c, 0));
+		r = vmlaq_f32(r, a.c1.data, vdupq_laneq_f32(c, 1));
+		r = vmlaq_f32(r, a.c2.data, vdupq_laneq_f32(c, 2));
+		result[i].data = r;
+	}
+	#else
+	result = simd_f32_4x4((float3x3)a * (float3x3)b, a.c3);
+	#endif
+	return result;
+}
+/**
+ * @brief Calculates 3x3 dot product between SIMD matrix and vector.
+ * @param[in] m target SIMD matrix to use
+ * @param v target SIMD vector to dot by
+ */
+static simd_f32_4 multiply3x3(const simd_f32_4x4& m, simd_f32_4 v) noexcept
+{
+	simd_f32_4 result;
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	auto r = _mm_mul_ps(m.c0.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0)));
+	r = _mm_add_ps(r, _mm_mul_ps(m.c1.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1))));
+	r = _mm_add_ps(r, _mm_mul_ps(m.c2.data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2))));
+	result = r;
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	auto r = vmulq_f32(m.c0.data, vdupq_laneq_f32(v.data, 0));
+	r = vmlaq_f32(r, m.c1.data, vdupq_laneq_f32(v.data, 1));
+	r = vmlaq_f32(r, m.c2.data, vdupq_laneq_f32(v.data, 2));
+	result = r;
+	#else
+	auto t = float3x3((float3)m.c0, (float3)m.c1, (float3)m.c2) * (float3)v;
+	result = simd_f32_4(t.x, t.y, t.z, v.floats.w);
+	#endif
+	return result.swizzle<SwX, SwY, SwZ, SwZ>();
+}
 
-static simd_f32_4x4 transpose(const simd_f32_4x4& m)
+/**
+ * @brief Calculates 4x4 SIMD matrix transpose. (Flips matrix over its diagonal)
+ * @param[in] m target SIMD matrix to transpose
+ */
+static simd_f32_4x4 transpose4x4(const simd_f32_4x4& m) noexcept
 {
-	#if defined(JPH_USE_SSE)
-	auto t1 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(1, 0, 1, 0));
-	auto t3 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(3, 2, 3, 2));
-	auto t2 = _mm_shuffle_ps(mCol[2].mValue, mCol[3].mValue, _MM_SHUFFLE(1, 0, 1, 0));
-	auto t4 = _mm_shuffle_ps(mCol[2].mValue, mCol[3].mValue, _MM_SHUFFLE(3, 2, 3, 2));
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	auto t1 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(1, 0, 1, 0));
+	auto t3 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(3, 2, 3, 2));
+	auto t2 = _mm_shuffle_ps(m.c2.data, m.c3.data, _MM_SHUFFLE(1, 0, 1, 0));
+	auto t4 = _mm_shuffle_ps(m.c2.data, m.c3.data, _MM_SHUFFLE(3, 2, 3, 2));
 	return simd_f32_4x4(
 		_mm_shuffle_ps(t1, t2, _MM_SHUFFLE(2, 0, 2, 0)),
 		_mm_shuffle_ps(t1, t2, _MM_SHUFFLE(3, 1, 3, 1)),
 		_mm_shuffle_ps(t3, t4, _MM_SHUFFLE(2, 0, 2, 0)),
 		_mm_shuffle_ps(t3, t4, _MM_SHUFFLE(3, 1, 3, 1)));
-	#elif defined(JPH_USE_NEON)
-	auto t1 = vzipq_f32(mCol[0].mValue, mCol[2].mValue);
-	auto t2 = vzipq_f32(mCol[1].mValue, mCol[3].mValue);
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	auto t1 = vzipq_f32(m.c0.data, m.c2.data);
+	auto t2 = vzipq_f32(m.c1.data, m.c3.data);
 	auto t3 = vzipq_f32(t1.val[0], t2.val[0]);
 	auto t4 = vzipq_f32(t1.val[1], t2.val[1]);
 	return simd_f32_4x4(t3.val[0], t3.val[1], t4.val[0], t4.val[1]);
@@ -235,30 +341,187 @@ static simd_f32_4x4 transpose(const simd_f32_4x4& m)
 	return simd_f32_4x4(transpose(*((const float4x4*)&m)));
 	#endif
 }
+/**
+ * @brief Calculates 3x3 SIMD matrix transpose. (Flips matrix over its diagonal)
+ * @param[in] m target SIMD matrix to transpose
+ */
+static simd_f32_4x4 transpose3x3(const simd_f32_4x4& m) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	auto zero = _mm_setzero_ps();
+	auto t1 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(1, 0, 1, 0));
+	auto t3 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(3, 2, 3, 2));
+	auto t2 = _mm_shuffle_ps(m.c2.data, zero, _MM_SHUFFLE(1, 0, 1, 0));
+	auto t4 = _mm_shuffle_ps(m.c2.data, zero, _MM_SHUFFLE(3, 2, 3, 2));
+	return simd_f32_4x4(
+		_mm_shuffle_ps(t1, t2, _MM_SHUFFLE(2, 0, 2, 0)),
+		_mm_shuffle_ps(t1, t2, _MM_SHUFFLE(3, 1, 3, 1)),
+		_mm_shuffle_ps(t3, t4, _MM_SHUFFLE(2, 0, 2, 0)), m.c3);
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	auto t1 = vzipq_f32(m.c0.data, m.c2.data);
+	auto t2 = vzipq_f32(m.c1.data, vdupq_n_f32(0));
+	auto t3 = vzipq_f32(t1.val[0], t2.val[0]);
+	auto t4 = vzipq_f32(t1.val[1], t2.val[1]);
+	return simd_f32_4x4(t3.val[0], t3.val[1], t4.val[0], m.c3);
+	#else
+	auto r = transpose((float3x3)m);
+	return simd_f32_4x4(
+		simd_f32_4(r.c0.x, r.c0.y, r.c0.z, m.c3.floats.x), 
+		simd_f32_4(r.c1.x, r.c1.y, r.c1.z, m.c3.floats.y), 
+		simd_f32_4(r.c2.x, r.c2.y, r.c2.z, m.c3.floats.z), m.c3);
+	#endif
+}
 
 /**
- * @brief Calculates 3x3 dot product between SIMD matrix and vector.
- * @param[int] m target SIMD matrix to use
- * @param v target SIMD vector to dot by
+ * @brief Calculates 4x4 SIMD matrix inverse. (Usefull for undoing transformations)
+ * @param[in] m target SIMD matrix to inverse
  */
-static simd_f32_4 multiply3x3(const simd_f32_4x4& m, simd_f32_4 v) noexcept
+static simd_f32_4x4 inverse4x4(const simd_f32_4x4& m) noexcept
 {
-	simd_f32_4 result;
 	#if defined(MATH_SIMD_SUPPORT_SSE)
-	auto r = _mm_mul_ps(m.columns[0].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0)));
-	r = _mm_add_ps(r, _mm_mul_ps(m.columns[1].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1))));
-	r = _mm_add_ps(r, _mm_mul_ps(m.columns[2].data, _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2))));
-	result = r;
+	auto t1 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(1, 0, 1, 0));
+	auto r1 = _mm_shuffle_ps(m.c2.data, m.c3.data, _MM_SHUFFLE(1, 0, 1, 0));
+	auto r0 = _mm_shuffle_ps(t1, r1, _MM_SHUFFLE(2, 0, 2, 0));
+	r1 = _mm_shuffle_ps(r1, t1, _MM_SHUFFLE(3, 1, 3, 1));
+	t1 = _mm_shuffle_ps(m.c0.data, m.c1.data, _MM_SHUFFLE(3, 2, 3, 2));
+	auto r3 = _mm_shuffle_ps(m.c2.data, m.c3.data, _MM_SHUFFLE(3, 2, 3, 2));
+	auto r2 = _mm_shuffle_ps(t1, r3, _MM_SHUFFLE(2, 0, 2, 0));
+	r3 = _mm_shuffle_ps(r3, t1, _MM_SHUFFLE(3, 1, 3, 1));
+
+	t1 = _mm_mul_ps(r2, r3);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1));
+	auto m0 = _mm_mul_ps(r1, t1);
+	auto m1 = _mm_mul_ps(r0, t1);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2));
+	m0 = _mm_sub_ps(_mm_mul_ps(r1, t1), m0);
+	m1 = _mm_sub_ps(_mm_mul_ps(r0, t1), m1);
+	m1 = _mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1, 0, 3, 2));
+
+	t1 = _mm_mul_ps(r1, r2);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1));
+	m0 = _mm_add_ps(_mm_mul_ps(r3, t1), m0);
+	auto m3 = _mm_mul_ps(r0, t1);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2));
+	m0 = _mm_sub_ps(m0, _mm_mul_ps(r3, t1));
+	m3 = _mm_sub_ps(_mm_mul_ps(r0, t1), m3);
+	m3 = _mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1, 0, 3, 2));
+
+	t1 = _mm_mul_ps(_mm_shuffle_ps(r1, r1, _MM_SHUFFLE(1, 0, 3, 2)), r3);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1));
+	r2 = _mm_shuffle_ps(r2, r2, _MM_SHUFFLE(1, 0, 3, 2));
+	m0 = _mm_add_ps(_mm_mul_ps(r2, t1), m0);
+	auto m2 = _mm_mul_ps(r0, t1);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2));
+	m0 = _mm_sub_ps(m0, _mm_mul_ps(r2, t1));
+	m2 = _mm_sub_ps(_mm_mul_ps(r0, t1), m2);
+	m2 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1, 0, 3, 2));
+
+	t1 = _mm_mul_ps(r0, r1);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1));
+	m2 = _mm_add_ps(_mm_mul_ps(r3, t1), m2);
+	m3 = _mm_sub_ps(_mm_mul_ps(r2, t1), m3);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2));
+	m2 = _mm_sub_ps(_mm_mul_ps(r3, t1), m2);
+	m3 = _mm_sub_ps(m3, _mm_mul_ps(r2, t1));
+
+	t1 = _mm_mul_ps(r0, r3);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1));
+	m1 = _mm_sub_ps(m1, _mm_mul_ps(r2, t1));
+	m2 = _mm_add_ps(_mm_mul_ps(r1, t1), m2);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2));
+	m1 = _mm_add_ps(_mm_mul_ps(r2, t1), m1);
+	m2 = _mm_sub_ps(m2, _mm_mul_ps(r1, t1));
+
+	t1 = _mm_mul_ps(r0, r2);
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 3, 0, 1));
+	m1 = _mm_add_ps(_mm_mul_ps(r3, t1), m1);
+	m3 = _mm_sub_ps(m3, _mm_mul_ps(r1, t1));
+	t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 0, 3, 2));
+	m1 = _mm_sub_ps(m1, _mm_mul_ps(r3, t1));
+	m3 = _mm_add_ps(_mm_mul_ps(r1, t1), m3);
+
+	auto d = _mm_mul_ps(r0, m0);
+	d = _mm_add_ps(_mm_shuffle_ps(d, d, _MM_SHUFFLE(2, 3, 0, 1)), d);
+	d = _mm_add_ss(_mm_shuffle_ps(d, d, _MM_SHUFFLE(1, 0, 3, 2)), d);
+	d = _mm_div_ss(_mm_set_ss(1.0f), d);
+	d = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0, 0, 0, 0));
+
+	return simd_f32_4x4(_mm_mul_ps(d, m0), _mm_mul_ps(d, m1), 
+		_mm_mul_ps(d, m2), _mm_mul_ps(d, m3));
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
-	auto r = vmulq_f32(m.columns[0].data, vdupq_laneq_f32(v.data, 0));
-	r = vmlaq_f32(r, m.columns[1].data, vdupq_laneq_f32(v.data, 1));
-	r = vmlaq_f32(r, m.columns[2].data, vdupq_laneq_f32(v.data, 2));
-	result = r;
+	auto t1 = __builtin_shufflevector(m.c0.data, m.c1.data, 0, 1, 4, 5);
+	auto r1 = __builtin_shufflevector(m.c2.data, m.c3.data, 0, 1, 4, 5);
+	auto r0 = __builtin_shufflevector(t1, r1, 0, 2, 4, 6);
+	r1 = __builtin_shufflevector(r1, t1, 1, 3, 5, 7);
+	t1 = __builtin_shufflevector(m.c0.data, m.c1.data, 2, 3, 6, 7);
+	auto r3 = __builtin_shufflevector(m.c2.data, m.c3.data, 2, 3, 6, 7);
+	auto r2 = __builtin_shufflevector(t1, r3, 0, 2, 4, 6);
+	r3 = __builtin_shufflevector(r3, t1, 1, 3, 5, 7);
+
+	t1 = vmulq_f32(r2, r3);
+	t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2);
+	auto m0 = vmulq_f32(r1, t1);
+	auto m1 = vmulq_f32(r0, t1);
+	t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1);
+	m0 = vsubq_f32(vmulq_f32(r1, t1), m0);
+	m1 = vsubq_f32(vmulq_f32(r0, t1), m1);
+	m1 = __builtin_shufflevector(m1, m1, 2, 3, 0, 1);
+
+	t1 = vmulq_f32(r1, r2);
+	t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2);
+	m0 = vaddq_f32(vmulq_f32(r3, t1), m0);
+	auto m3 = vmulq_f32(r0, t1);
+	t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1);
+	m0 = vsubq_f32(m0, vmulq_f32(r3, t1));
+	m3 = vsubq_f32(vmulq_f32(r0, t1), m3);
+	m3 = __builtin_shufflevector(m3, m3, 2, 3, 0, 1);
+
+	t1 = __builtin_shufflevector(r1, r1, 2, 3, 0, 1);
+	t1 = vmulq_f32(t1, r3);
+	t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2);
+	r2 = __builtin_shufflevector(r2, r2, 2, 3, 0, 1);
+	m0 = vaddq_f32(vmulq_f32(r2, t1), m0);
+	auto m2 = vmulq_f32(r0, t1);
+	t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1);
+	m0 = vsubq_f32(m0, vmulq_f32(r2, t1));
+	m2 = vsubq_f32(vmulq_f32(r0, t1), m2);
+	m2 = __builtin_shufflevector(m2, m2, 2, 3, 0, 1);
+
+	t1 = vmulq_f32(r0, r1);
+	t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2);
+	m2 = vaddq_f32(vmulq_f32(r3, t1), m2);
+	m3 = vsubq_f32(vmulq_f32(r2, t1), m3);
+	t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1);
+	m2 = vsubq_f32(vmulq_f32(r3, t1), m2);
+	m3 = vsubq_f32(m3, vmulq_f32(r2, t1));
+
+	t1 = vmulq_f32(r0, r3);
+	t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2);
+	m1 = vsubq_f32(m1, vmulq_f32(r2, t1));
+	m2 = vaddq_f32(vmulq_f32(r1, t1), m2);
+	t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1);
+	m1 = vaddq_f32(vmulq_f32(r2, t1), m1);
+	m2 = vsubq_f32(m2, vmulq_f32(r1, t1));
+
+	t1 = vmulq_f32(r0, r2);
+	t1 = __builtin_shufflevector(t1, t1, 1, 0, 3, 2);
+	m1 = vaddq_f32(vmulq_f32(r3, t1), m1);
+	m3 = vsubq_f32(m3, vmulq_f32(r1, t1));
+	t1 = __builtin_shufflevector(t1, t1, 2, 3, 0, 1);
+	m1 = vsubq_f32(m1, vmulq_f32(r3, t1));
+	m3 = vaddq_f32(vmulq_f32(r1, t1), m3);
+
+	auto d = vmulq_f32(r0, m0);
+	d = vdupq_n_f32(vaddvq_f32(d));
+	d = vdivq_f32(vdupq_n_f32(1.0f), d);
+
+	return simd_f32_4x4(vmulq_f32(d, m0), vmulq_f32(d, m1), 
+		vmulq_f32(d, m2), vmulq_f32(d, m3));
 	#else
-	auto t = float3x3((float3)m.columns[0], (float3)m.columns[1], (float3)m.columns[2]) * (float3)v;
-	result = simd_f32_4(t.x, t.y, t.z, m.w);
+	return simd_f32_4x4(inverse(*((const float4x4*)&m)));
 	#endif
-	return result.swizzle<SwX, SwY, SwZ, SwZ>();
 }
 
+// TODO: inverse of the 3x3 matrix like in the Jolt lib.
+
 } // namespace math
\ No newline at end of file
diff --git a/include/math/simd/matrix/transform.hpp b/include/math/simd/matrix/transform.hpp
index fe812a9..7cb86d6 100644
--- a/include/math/simd/matrix/transform.hpp
+++ b/include/math/simd/matrix/transform.hpp
@@ -25,26 +25,26 @@ namespace math
 {
 
 /**
- * @brief Returns total matrix translation transformation of an object in 3D space.
- * @param[in] m target model matrix to extract from
+ * @brief Returns total SIMD matrix translation transformation of an object in 3D space.
+ * @param[in] m target model SIMD matrix to extract from
  */
 static simd_f32_4 getTranslation(const simd_f32_4x4& m) noexcept
 {
-	return m.columns[3];
+	return m.c3;
 }
 /**
- * @brief Sets total matrix translation transformation of an object in 3D space.
+ * @brief Sets total SIMD matrix translation transformation of an object in 3D space.
  * 
- * @param[out] m target model matrix to set
+ * @param[out] m target model SIMD matrix to set
  * @param t target object position
  */
 static void setTranslation(simd_f32_4x4& m, simd_f32_4 t) noexcept
 {
-	m.columns[3] = simd_f32_4(t, m.columns[3].getW());
+	m.c3 = simd_f32_4(t, m.c3.getW());
 }
 
 /**
- * @brief Creates a new matrix with a specified object position in 3D space.
+ * @brief Creates a new SIMD matrix with a specified object position in 3D space.
  * @param t target object translation
  */
 static simd_f32_4x4 translate(simd_f32_4 t) noexcept
@@ -60,8 +60,7 @@ static simd_f32_4x4 translate(simd_f32_4 t) noexcept
  */
 static simd_f32_4x4 translate(const simd_f32_4x4& m, simd_f32_4 t) noexcept
 {
-	return simd_f32_4x4(m.columns[0], m.columns[1], m.columns[2], 
-		simd_f32_4(getTranslation(m) + multiply3x3(m, t), m.columns[3].getW()));
+	return simd_f32_4x4(m.c0, m.c1, m.c2, simd_f32_4(m.c3 + multiply3x3(m, t), m.c3.getW()));
 }
 /**
  * @brief Applies translation transformation to an object in 3D space. [r = translate(t) * m]
@@ -71,12 +70,11 @@ static simd_f32_4x4 translate(const simd_f32_4x4& m, simd_f32_4 t) noexcept
  */
 static simd_f32_4x4 translate(simd_f32_4 t, const simd_f32_4x4& m) noexcept
 {
-	return simd_f32_4x4(m.columns[0], m.columns[1], m.columns[2], 
-		simd_f32_4(getTranslation(m) + t, m.columns[3].getW()));
+	return simd_f32_4x4(m.c0, m.c1, m.c2, simd_f32_4(m.c3 + t, m.c3.getW()));
 }
 
 /***********************************************************************************************************************
- * @brief Creates a new matrix with a specified object scale in 3D space.
+ * @brief Creates a new SIMD matrix with a specified object scale in 3D space.
  * @param s target object scale
  */
 static simd_f32_4x4 scale(simd_f32_4 s) noexcept
@@ -88,122 +86,130 @@ static simd_f32_4x4 scale(simd_f32_4 s) noexcept
 		simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f));
 }
 /**
- * @brief Applies scale transformation to an object in 3D space. [r = scale(s) * m]
+ * @brief Applies scale transformation to an object in 3D space. [r = m * scale(s)]
  *
- * @param[in] m target model matrix to scale
+ * @param[in] m target model SIMD matrix to scale
  * @param s target object scale
  */
-static simd_f32_4x4 scale(simd_f32_4 s, const simd_f32_4x4& m) noexcept
+static simd_f32_4x4 scale(const simd_f32_4x4& m, simd_f32_4 s) noexcept
 {
-	auto sm = scale(s);
-	return simd_f32_4x4(sm * m.columns[0], sm * m.columns[1], sm * m.columns[2], sm * m.columns[3]);
+	return simd_f32_4x4(m.c0 * simd_f32_4(s.getX()), 
+		m.c1 * simd_f32_4(s.getY()), m.c2 * simd_f32_4(s.getZ()), m.c3);
 }
 /**
- * @brief Applies scale transformation to an object in 3D space. [r = m * scale(s)]
+ * @brief Applies scale transformation to an object in 3D space. [r = scale(s) * m]
  *
- * @param[in] m target model matrix to scale
+ * @param[in] m target model SIMD matrix to scale
  * @param s target object scale
  */
-static simd_f32_4x4 scale(const simd_f32_4x4& m, simd_f32_4 s) noexcept
+static simd_f32_4x4 scale(simd_f32_4 s, const simd_f32_4x4& m) noexcept
 {
-	return simd_f32_4x4(m.columns[0] * simd_f32_4(s.getX()), 
-		m.columns[1] * simd_f32_4(s.getY()), m.columns[2] * simd_f32_4(s.getZ()), m.columns[3]);
+	auto sm = scale(s);
+	return simd_f32_4x4(sm * m.c0, sm * m.c1, sm * m.c2, sm * m.c3);
 }
 
 /**
- * @brief Extracts total matrix scale transformation of an object in 3D space.
- * @param[in] m target model matrix to extract from
+ * @brief Extracts total SIMD matrix scale transformation of an object in 3D space.
+ * @param[in] m target model SIMD matrix to extract from
  */
 static simd_f32_4 extractScale(const simd_f32_4x4& m) noexcept
 {
-	return simd_f32_4(length(m.c0), length((float3)m.c1), length((float3)m.c2));
+	return simd_f32_4(lengthSq3(m.c0), lengthSq3(m.c1), lengthSq3(m.c2));
 }
 /**
- * @brief Extracts total matrix 2D scale transformation of an object in 3D space.
- * @param[in] m target model matrix to extract from
+ * @brief Extracts total SIMD matrix 2D scale transformation of an object in 3D space.
+ * @param[in] m target model SIMD matrix to extract from
  */
 static float2 extractScale2(const simd_f32_4x4& m) noexcept
 {
-	return float2(length((float3)m.c0), length((float3)m.c1));
+	return float2(lengthSq3(m.c0), length3(m.c1));
 }
 
 /***********************************************************************************************************************
- * @brief Creates a new matrix with a specified object rotation in 3D space.
+ * @brief Creates a new SIMD matrix with a specified object rotation in 3D space.
  * @param q target object rotation
  */
-static simd_f32_4x4 rotate(quat q) noexcept
+static simd_f32_4x4 rotate(simd_f32_quat q) noexcept
 {
-	auto xx = q.x * q.x, yy = q.y * q.y, zz = q.z * q.z;
-	auto xz = q.x * q.z, xy = q.x * q.y, yz = q.y * q.z;
-	auto wx = q.w * q.x, wy = q.w * q.y, wz = q.w * q.z;
+	auto x = q.getX(), y = q.getY(), z = q.getZ(), w = q.getW();
+	auto xx = x * x, yy = y * y, zz = z * z;
+	auto xz = x * z, xy = x * y, yz = y * z;
+	auto wx = w * x, wy = w * y, wz = w * z;
 
-	return float4x4(
-		1.0f - 2.0f * (yy + zz), 2.0f * (xy - wz), 2.0f * (xz + wy), 0.0f,
-		2.0f * (xy + wz), 1.0f - 2.0f * (xx + zz), 2.0f * (yz - wx), 0.0f,
-		2.0f * (xz - wy), 2.0f * (yz + wx), 1.0f - 2.0f * (xx + yy), 0.0f,
-		0.0f, 0.0f, 0.0f, 1.0f);
+	return simd_f32_4x4(
+		simd_f32_4(1.0f - 2.0f * (yy + zz), 2.0f * (xy - wz), 2.0f * (xz + wy), 0.0f),
+		simd_f32_4(2.0f * (xy + wz), 1.0f - 2.0f * (xx + zz), 2.0f * (yz - wx), 0.0f),
+		simd_f32_4(2.0f * (xz - wy), 2.0f * (yz + wx), 1.0f - 2.0f * (xx + yy), 0.0f),
+		simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f));
 }
 /**
- * @brief Calculates a new rotation matrix from look vectors in 3D space.
+ * @brief Calculates a new rotation SIMD matrix from look vectors in 3D space.
  *
- * @param[in] lookFrom viewer position in 3D space
- * @param[in] lookTo object position in 3D space
- * @param[in] up space up direction vector
+ * @param lookFrom viewer position in 3D space
+ * @param lookTo object position in 3D space
+ * @param up space up direction vector
  */
-static simd_f32_4x4 rotate(const float3& lookFrom, const float3& lookTo, const float3& up = float3::top) noexcept
+static simd_f32_4x4 rotate(simd_f32_4 lookFrom, simd_f32_4 lookTo, 
+	simd_f32_4 up = simd_f32_4(0.0f, 1.0f, 0.0f, 0.0f)) noexcept
 {
-	auto f = normalize(lookTo - lookFrom);
-	auto s = normalize(cross(up, f));
-	auto u = cross(f, s);
+	auto f = normalize3(lookTo - lookFrom);
+	auto s = normalize3(cross3(up, f));
+	auto u = cross3(f, s);
 
-	return float4x4(
-		s.x, s.y, s.z, 0.0f,
-		u.x, u.y, u.z, 0.0f,
-		f.x, f.y, f.z, 0.0f,
-		0.0f, 0.0f, 0.0f, 1.0f);
+	return simd_f32_4x4(simd_f32_4(s, 0.0f), simd_f32_4(u, 0.0f), 
+		simd_f32_4(f, 0.0f), simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f));
 }
 /**
- * @brief Calculates a new matrix from direction vector in 3D space.
+ * @brief Calculates a new SIMD matrix from direction vector in 3D space.
  *
- * @param[in] front view direction vector in 3D space
- * @param[in] up space up direction vector
+ * @param front view direction vector in 3D space
+ * @param up space up direction vector
  */
-static simd_f32_4x4 rotate(const float3& front, const float3& up = float3::top) noexcept
+static simd_f32_4x4 rotate(simd_f32_4 front, simd_f32_4 up = simd_f32_4(0.0f, 1.0f, 0.0f, 0.0f)) noexcept
 {
-	auto f = front;
-	auto s = normalize(cross(up, f));
-	auto u = cross(f, s);
+	auto s = normalize3(cross3(up, front));
+	auto u = cross3(front, s);
 
-	return float4x4(
-		s.x, s.y, s.z, 0.0f,
-		u.x, u.y, u.z, 0.0f,
-		f.x, f.y, f.z, 0.0f,
-		0.0f, 0.0f, 0.0f, 1.0f);
+	return simd_f32_4x4(simd_f32_4(s, 0.0f), simd_f32_4(u, 0.0f), 
+		simd_f32_4(front, 0.0f), simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f));
 }
 
 /**
- * @brief Extracts total matrix rotation transformation of an object in 3D space.
- * @param[in] m target model matrix to extract from
+ * @brief Extracts total SIMD matrix rotation transformation of an object in 3D space.
+ * @param[in] m target model SIMD matrix to extract from
  */
 static simd_f32_4x4 extractRotation(const simd_f32_4x4& m) noexcept
 {
 	auto invScale = 1.0f / extractScale(m);
-	auto c0 = (float3)m.c0 * invScale.x;
-	auto c1 = (float3)m.c1 * invScale.y;
-	auto c2 = (float3)m.c2 * invScale.z;
-	return float4x4(float4(c0, 0.0f), float4(c1, 0.0f), float4(c2, 0.0f), float4(0.0f, 0.0f, 0.0f, 1.0f));
+	auto c0 = m.c0 * invScale.getX();
+	auto c1 = m.c1 * invScale.getY();
+	auto c2 = m.c2 * invScale.getZ();
+	
+	return simd_f32_4x4(simd_f32_4(c0, 0.0f), simd_f32_4(c1, 0.0f), 
+		simd_f32_4(c2, 0.0f), simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f));
+}
+/**
+ * @brief Extracts total SIMD matrix rotation transformation of an object in 3D space.
+ * @warning Matrix should be only translated and/or rotated, without scaling!!!
+ * @param[in] m target model SIMD matrix to extract from
+ */
+static simd_f32_4x4 extractRotationOnly(const simd_f32_4x4& m) noexcept
+{
+	return simd_f32_4x4(simd_f32_4(m.c0, 0.0f), simd_f32_4(m.c1, 0.0f), 
+		simd_f32_4(m.c2, 0.0f), simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f));
 }
 
 /**
- * @brief Extracts total matrix rotation quaternion of an object in 3D space.
- * @param[in] m target rotation matrix to extract from
+ * @brief Extracts total SIMD matrix rotation quaternion of an object in 3D space.
+ * @param m target rotation SIMD matrix to extract from
  */
-static quat extractQuat(const float3x3& m) noexcept
+static simd_f32_quat extractQuat(simd_f32_4x4 m) noexcept
 {
-	auto fourXSquaredMinus1 = m.c0.x - m.c1.y - m.c2.z;
-	auto fourYSquaredMinus1 = m.c1.y - m.c0.x - m.c2.z;
-	auto fourZSquaredMinus1 = m.c2.z - m.c0.x - m.c1.y;
-	auto fourWSquaredMinus1 = m.c0.x + m.c1.y + m.c2.z;
+	auto c0X = m.c0.getX(), c1Y = m.c1.getY(), c2Z = m.c2.getZ();
+	auto fourXSquaredMinus1 = c0X - c1Y - c2Z;
+	auto fourYSquaredMinus1 = c1Y - c0X - c2Z;
+	auto fourZSquaredMinus1 = c2Z - c0X - c1Y;
+	auto fourWSquaredMinus1 = c0X + c1Y + c2Z;
 
 	int biggestIndex = 0;
 	auto fourBiggestSquaredMinus1 = fourWSquaredMinus1;
@@ -228,80 +234,100 @@ static quat extractQuat(const float3x3& m) noexcept
 
 	switch (biggestIndex)
 	{
-	case 0: return quat((m.c1.z - m.c2.y) * mult, (m.c2.x - m.c0.z) * mult, (m.c0.y - m.c1.x) * mult, biggestVal);
-	case 1: return quat(biggestVal, (m.c0.y + m.c1.x) * mult, (m.c2.x + m.c0.z) * mult, (m.c1.z - m.c2.y) * mult);
-	case 2: return quat((m.c0.y + m.c1.x) * mult, biggestVal, (m.c1.z + m.c2.y) * mult, (m.c2.x - m.c0.z) * mult);
-	case 3: return quat((m.c2.x + m.c0.z) * mult, (m.c1.z + m.c2.y) * mult, biggestVal, (m.c0.y - m.c1.x) * mult);
+	case 0: return simd_f32_quat((m.c1.getZ() - m.c2.getY()) * mult, 
+		(m.c2.getX() - m.c0.getZ()) * mult, (m.c0.getY() - m.c1.getX()) * mult, biggestVal);
+	case 1: return simd_f32_quat(biggestVal, (m.c0.getY() + m.c1.getX()) * mult, 
+		(m.c2.getX() + m.c0.getZ()) * mult, (m.c1.getZ() - m.c2.getY()) * mult);
+	case 2: return simd_f32_quat((m.c0.getY() + m.c1.getX()) * mult, biggestVal, 
+		(m.c1.getZ() + m.c2.getY()) * mult, (m.c2.getX() - m.c0.getZ()) * mult);
+	case 3: return simd_f32_quat((m.c2.getX() + m.c0.getZ()) * mult, 
+		(m.c1.getZ() + m.c2.getY()) * mult, biggestVal, (m.c0.getY() - m.c1.getX()) * mult);
 	default: abort();
 	}
 }
-/**
- * @brief Extracts total matrix rotation quaternion of an object in 3D space.
- * @param[in] m target rotation matrix to extract from
- */
-static quat extractQuat(const simd_f32_4x4& m) noexcept
-{
-	return extractQuat((float3x3)m);
-}
 
 /***********************************************************************************************************************
- * @brief Calculates object model matrix from it position, rotation and scale.
+ * @brief Calculates object model SIMD matrix from it position, rotation and scale.
  *
- * @param[in] position object position in 3D space
- * @param[in] rotation object rotation in 3D space
- * @param[in] scale object scale in 3D space
+ * @param position object position in 3D space
+ * @param rotation object rotation in 3D space
+ * @param scale object scale in 3D space
  */
-static simd_f32_4x4 calcModel(const float3& position = float3::zero,
-	const quat& rotation = quat::identity, const float3& scale = float3::one) noexcept
+static simd_f32_4x4 calcModel(simd_f32_4 position = simd_f32_4::zero,
+	simd_f32_quat rotation = simd_f32_quat::identity, simd_f32_4 scale = simd_f32_4::one) noexcept
 {
-	return translate(position) * rotate(normalize(rotation)) * math::scale(scale);
+	return scale == simd_f32_4::one ? translate(position, rotate(normalize3(rotation))) :
+		translate(position) * rotate(normalize3(rotation)) * math::scale(scale);
 }
 /**
- * @brief Extracts total matrix position, rotation and scale of an object in 3D space. (Decompose)
+ * @brief Extracts total SIMD matrix position, rotation and scale of an object in 3D space. (Decompose)
  *
- * @param[in] m target model matrix to extract from
+ * @param[in] m target model SIMD matrix to extract from
  * @param[out] position object position in 3D space
- * @param[out] scale object scale in 3D space
  * @param[out] rotation object rotation in 3D space
+ * @param[out] scale object scale in 3D space
  */
-static void extractTransform(const simd_f32_4x4& m, float3& position, float3& scale, quat& rotation) noexcept
+static void extractTransform(const simd_f32_4x4& m, simd_f32_4& position, 
+	simd_f32_quat& rotation, simd_f32_4& scale) noexcept
 {
 	position = getTranslation(m);
-	scale = extractScale(m);
 	rotation = extractQuat(extractRotation(m));
+	scale = extractScale(m);
+}
+/**
+ * @brief Extracts total SIMD matrix position and rotation of an object in 3D space. (Decompose)
+ *
+ * @param[in] m target model SIMD matrix to extract from
+ * @param[out] position object position in 3D space
+ * @param[out] rotation object rotation in 3D space
+ */
+static void extractTransform(const simd_f32_4x4& m, simd_f32_4& position, simd_f32_quat& rotation) noexcept
+{
+	position = getTranslation(m);
+	rotation = extractQuat(extractRotationOnly(m));
 }
 
 /***********************************************************************************************************************
- * @brief Calculates a new model matrix from look vectors in 3D space.
+ * @brief Calculates a new SIMD model matrix from look vectors in 3D space.
  *
  * @param[in] from viewer position in 3D space
  * @param[in] to object position in 3D space
  * @param[in] up space up direction vector
  */
-static simd_f32_4x4 lookAt(const float3& from, const float3& to, const float3& up = float3::top) noexcept
+static simd_f32_4x4 lookAt(simd_f32_4 from, simd_f32_4 to, simd_f32_4 up = simd_f32_4(0.0f, 1.0f, 0.0f, 0.0f)) noexcept
 {
-	auto f = normalize(to - from);
-	auto s = normalize(cross(up, f));
-	auto u = cross(f, s);
-	return simd_f32_4x4(
-		s.x, s.y, s.z, -dot(s, from),
-		u.x, u.y, u.z, -dot(u, from),
-		f.x, f.y, f.z, -dot(f, from),
-		0.0f, 0.0f, 0.0f, 1.0f);
+	auto f = normalize3(to - from);
+	auto s = normalize3(cross3(up, f));
+	auto u = cross3(f, s);
+
+	return simd_f32_4x4(simd_f32_4(s, -dot3(s, from)), simd_f32_4(u, -dot3(u, from)), 
+		simd_f32_4(f, -dot3(f, from)), simd_f32_4(0.0f, 0.0f, 0.0f, 1.0f));
 }
 /**
- * @brief Calculates a new quaternion from direction vector in 3D space.
+ * @brief Calculates a new SIMD quaternion from direction vector in 3D space.
  *
- * @param[in] direction viewer direction in 3D space
- * @param[in] up space up direction vector
+ * @param direction viewer direction in 3D space
+ * @param up space up direction vector
  */
-static quat lookAtQuat(const float3& direction, const float3& up = float3::top) noexcept
+static simd_f32_quat lookAtQuat(simd_f32_4 direction, simd_f32_4 up = simd_f32_4(0.0f, 1.0f, 0.0f, 0.0f)) noexcept
 {
-	auto right = cross(up, direction);
-	auto c0 = right * (1.0f / std::sqrt(std::max(0.00001f, dot(right, right))));
-	auto c1 = cross(right, c0);
-	auto m = float3x3(c0, c1, direction);
+	auto right = cross3(up, direction);
+	auto c0 = right * (1.0f / std::sqrt(std::max(0.00001f, dot3(right, right))));
+	auto c1 = cross3(right, c0);
+	auto m = simd_f32_4x4(c0, c1, direction);
 	return extractQuat(m);
 }
 
+/**
+ * @brief Calculates translated/rotated SIMD matrix inverse.
+ * @warning Matrix should be only translated and/or rotated, without scaling!!!
+ * @param[in] m target SIMD matrix to inverse
+ */
+static simd_f32_4x4 inverseTransRot(const simd_f32_4x4& m)
+{
+	auto t = transpose4x4(m);
+	setTranslation(t, -multiply3x3(t, getTranslation(m)));
+	return t;
+}
+
 } // namespace math
\ No newline at end of file
diff --git a/include/math/simd/quaternion.hpp b/include/math/simd/quaternion.hpp
new file mode 100644
index 0000000..38b2403
--- /dev/null
+++ b/include/math/simd/quaternion.hpp
@@ -0,0 +1,197 @@
+// Copyright 2022-2025 Nikita Fediuchin. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/***********************************************************************************************************************
+ * @file
+ * @brief Common single instruction multiple data (SIMD) floating point quaternion functions.
+ */
+
+#pragma once
+#include "math/simd/vector/float.hpp"
+
+namespace math
+{
+
+/**
+ * @brief SIMD quaternion rotation container. (In 3D space)
+ * @details Represents rotation using complex numbers, avoiding gimbal lock problem.
+ */
+struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_quat : public simd_f32_4
+{
+	/**
+	 * @brief Creates a new identity SIMD quaternion structure.
+	 * @note Identity quaternion represents rotation of 0 degrees around all axis.
+	 * @param xyzw quaternion parts vector
+	 */
+	simd_f32_quat(simd_f32_4 xyzw = identity) noexcept : simd_f32_4(xyzw) { }
+	/**
+	 * @brief Creates a new SIMD quaternion structure.
+	 * 
+	 * @param x imaginary vector X part
+	 * @param y imaginary vector Y part
+	 * @param z imaginary vector Z part
+	 * @param w real scalar part
+	 */
+	simd_f32_quat(float x, float y, float z, float w) noexcept : simd_f32_4(x, y, z, w) { }
+	
+	/**
+	 * @brief Creates a new SIMD quaternion structure from the angle and axis. (In radians)
+	 * 
+	 * @param angle target rotation around axis in radians
+	 * @param axis target 3D axis to rotate around
+	 */
+	simd_f32_quat(float angle, simd_f32_4 axis) noexcept
+	{
+		simd_f32_4 s, c;
+		sinCos(simd_f32_4(angle * 0.5f), s, c);
+		*this = select(simd_u32_4(0, 0, 0, UINT32_MAX), c, axis * s);
+	}
+
+	/**
+	 * @brief Rotates this SIMD quaternion by the specified one.
+	 * @param q target SIMD quaternion to rotate by
+	 */
+	simd_f32_quat operator*(simd_f32_quat q) const noexcept
+	{
+		auto x = getX(), y = getY(), z = getZ(), w = getW();
+		auto qX = q.getX(), qY = q.getY(), qZ = q.getZ(), qW = q.getW();
+		return simd_f32_quat(
+			w * qX + x * qW + y * qZ - z * qY,
+			w * qY + y * qW + z * qX - x * qZ,
+			w * qZ + z * qW + x * qY - y * qX,
+			w * qW - x * qX - y * qY - z * qZ);
+	}
+	/**
+	 * @brief Rotates 3D vector by this SIMD quaternion.
+	 * @param v target 3D vector to rotate
+	 */
+	simd_f32_4 operator*(simd_f32_4 v) const noexcept
+	{
+		auto cq = cross3(*this, v), ccq = cross3(*this, cq);
+		return fma(fma(cq, simd_f32_4(getW()), ccq), simd_f32_4(2.0f), v);
+	}
+	/**
+	 * @brief Rotates this SIMD quaternion by the specified one.
+	 * @param q target SIMD quaternion to rotate by
+	 */
+	simd_f32_4& operator*=(simd_f32_4 q) noexcept { return *this = *this * q; }
+
+	/**
+	 * @brief Extracts SIMD quaternion rotation around X axis. (In radians)
+	 */
+	float extractPitch() const noexcept
+	{
+		auto x = getX(), y = getY(), z = getZ(), w = getW();
+		auto yy = 2.0f * (y * z + w * x);
+		auto xx = w * w - x * x - y * y + z * z;
+
+		// Avoid atan2(0, 0) - handle singularity - Matiis
+		if (std::abs(xx) <= FLT_EPSILON || std::abs(yy) <= FLT_EPSILON)
+			return 2.0f * std::atan2(x, w);
+		return std::atan2(yy, xx);
+	}
+	/**
+	 * @brief Extracts SIMD quaternion around Y axis. (In radians)
+	 */
+	float extractYaw() const noexcept
+	{
+		return std::asin(std::clamp(-2.0f * (getX() * getZ() - getW() * getY()), -1.0f, 1.0f));
+	}
+	/**
+	 * @brief Extracts SIMD quaternion around Z axis. (In radians)
+	 */
+	float extractRoll() const noexcept
+	{
+		auto x = getX(), y = getY(), z = getZ(), w = getW();
+		return std::atan2(2.0f * (x * y + w * z), w * w + x * x - y * y - z * z);
+	}
+	/**
+	 * @brief Calculates SIMD quaternion euler angles. (In radians)
+	 */
+	simd_f32_4 extractEulerAngles() const noexcept { return simd_f32_4(extractPitch(), extractYaw(), extractRoll()); }
+
+	static const simd_f32_quat identity;
+};
+
+//**********************************************************************************************************************
+inline const simd_f32_quat simd_f32_quat::identity = simd_f32_quat(0.0f, 0.0f, 0.0f, 1.0f);
+
+/**
+ * @brief Creates a new SIMD quaternion structure from the euler angles. (In radians)
+ * @param eulerAngles target 3D euler angles SIMD vector
+ */
+static simd_f32_quat fromEulerAngles(simd_f32_4 eulerAngles) noexcept
+{
+	simd_f32_4 s, c;
+	sinCos(eulerAngles * 0.5f, s, c);
+	auto sX = s.getX(), sY = s.getY(), sZ = s.getZ();
+	auto cX = c.getX(), cY = c.getY(), cZ = c.getZ();
+	return simd_f32_quat(
+		sX * cY * cZ - cX * sY * sZ,
+		cX * sY * cZ + sX * cY * sZ,
+		cX * cY * sZ - sX * sY * cZ,
+		cX * cY * cZ + sX * sY * sZ);
+}
+
+/**
+ * @brief SIMD quaternion spherical linear interpolation from a to b.
+ * 
+ * @param a starting SIMD quaternion value
+ * @param b target SIMD quaternion value
+ * @param t interpolation value (from 0.0 to 1.0)
+ */
+static simd_f32_quat slerp(simd_f32_quat a, simd_f32_quat b, float t) noexcept
+{
+	auto cosTheta = dot4(a, b); auto c = (simd_f32_4)b;
+	if (cosTheta < 0.0f)
+	{
+		c = -b;
+		cosTheta = -cosTheta;
+	}
+
+	if (cosTheta > 1.0f - FLT_EPSILON)
+	{
+		return lerp(a, c, t);
+	}
+	else
+	{
+		auto angle = std::acos(cosTheta);
+		return ((simd_f32_4)a * std::sin((1.0f - t) * angle) + c * std::sin(t * angle)) / std::sin(angle);
+	}
+}
+
+/**
+ * @brief Inverts (undoes) SIMD quaternion rotation.
+ * @param q target quaternion to conjugate
+ */
+static simd_f32_quat conjugate(simd_f32_quat q) noexcept
+{
+	static const auto mask = floatAsUint(simd_f32_4(-0.0f, -0.0f, -0.0f, 0.0f));
+	return uintAsFloat(floatAsUint(q) ^ mask);
+}
+/**
+ * @brief Calculates inverse of the SIMD quaternion.
+ * @param q target quaternion to inverse
+ */
+static simd_f32_quat inverse(simd_f32_quat q) noexcept { return conjugate(q) / dot4(q, q);  }
+
+/**
+ * @brief Rotates 3D vector by the inversed SIMD quaternion.
+ * 
+ * @param v target 3D vector to rotate
+ * @param q target SIMD quaternion to use
+ */
+static simd_f32_4 operator*(simd_f32_4 v, simd_f32_quat q) noexcept { return inverse(q) * v; }
+
+} // namespace math
\ No newline at end of file
diff --git a/include/math/simd/types.hpp b/include/math/simd/types.hpp
index c5e83f4..fbc6070 100644
--- a/include/math/simd/types.hpp
+++ b/include/math/simd/types.hpp
@@ -18,7 +18,6 @@
  */
 
 #pragma once
-#include "math/vector/float.hpp"
 
 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
 
diff --git a/include/math/simd/vector/float.hpp b/include/math/simd/vector/float.hpp
index 4f55a99..42a9207 100644
--- a/include/math/simd/vector/float.hpp
+++ b/include/math/simd/vector/float.hpp
@@ -26,15 +26,17 @@ namespace math
 
 /**
  * @brief SIMD 4 component 32bit floating point vector structure.
+ * @details Commonly used to represent: points, positions, directions, velocities, etc.
  * @note Use it when you know how to implement a faster vectorized code.
  */
 struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 {
 	union
 	{
-		_simd_f128 data; /** Packed SIMD 4 component 32bit floating point vector data. */
-		float4 vec;
-
+		_simd_f128 data;
+		float4 floats;
+		int4 ints;
+		uint4 uints;
 	};
 
 	simd_f32_4(const simd_f32_4& v) = default;
@@ -50,7 +52,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = vdupq_n_f32(0.0f);
 		#else
-		vec = float4::zero;
+		floats = float4::zero;
 		#endif
 	}
 	/**
@@ -64,7 +66,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = vdupq_n_f32(xyzw);
 		#else
-		vec = float4(xyzw);
+		floats = float4(xyzw);
 		#endif
 	}
 	/**
@@ -82,7 +84,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { x, y, z, w };
 		#else
-		vec = float4(x, y, z, w);
+		floats = float4(x, y, z, w);
+		#endif
+	}
+	/**
+	 * @brief Creates a new SIMD 4 component 32bit floating point vector structure.
+	 * @warning This constructor duplicates Z component to the W component!
+	 *
+	 * @param x first vector component value
+	 * @param y second vector component value
+	 * @param z third vector component value
+	 */
+	simd_f32_4(float x, float y, float z) noexcept
+	{
+		#if defined(MATH_SIMD_SUPPORT_SSE)
+		data = _mm_set_ps(z, z, y, x);
+		#elif defined(MATH_SIMD_SUPPORT_NEON)
+		data = { x, y, z, z };
+		#else
+		floats = float4(x, y, z, z);
 		#endif
 	}
 	/**
@@ -96,11 +116,11 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#if defined(MATH_SIMD_SUPPORT_SSE4_1)
 		data = _mm_blend_ps(xyz.data, _mm_set1_ps(w), 8);
 		#elif defined(MATH_SIMD_SUPPORT_SSE)
-		data = _mm_set_ps(w, xyz.vec.z, xyz.vec.y, xyz.vec.x);
+		data = _mm_set_ps(w, xyz.floats.z, xyz.floats.y, xyz.floats.x);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = vsetq_lane_f32(w, xyz.data, 3);
 		#else
-		vec = xyz.vec; vec.w = w;
+		floats = xyz.floats; floats.w = w;
 		#endif
 	}
 
@@ -114,30 +134,30 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 
 	/**
 	 * @brief Creates a new SIMD 4 component 32bit floating point vector structure.
-	 * @param data target vector unsigned integer SIMD data
+	 * @param v target vector unsigned integer SIMD data
 	 */
 	explicit simd_f32_4(simd_u32_4 v)
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		this->data = _mm_cvtepi32_ps(v.data);
+		data = _mm_cvtepi32_ps(v.data);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		this->data = vcvtq_f32_u32(v.data);
+		data = vcvtq_f32_u32(v.data);
 		#else
-		vec = v.vec;
+		floats = v.floats;
 		#endif
 	}
 	/**
 	 * @brief Creates a new SIMD 4 component 32bit floating point vector structure.
-	 * @param data target vector signed integer SIMD data
+	 * @param v target vector signed integer SIMD data
 	 */
 	explicit simd_f32_4(simd_i32_4 v)
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		this->data = _mm_cvtepi32_ps(v.data);
+		data = _mm_cvtepi32_ps(v.data);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		this->data = vcvtq_f32_s32(v.data);
+		data = vcvtq_f32_s32(v.data);
 		#else
-		vec = v.vec;
+		floats = v.floats;
 		#endif
 	}
 
@@ -152,7 +172,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { v.x, v.y, v.z, v.w };
 		#else
-		vec = v;
+		floats = v;
 		#endif
 	}
 	/**
@@ -166,7 +186,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { v.x, v.y, v.z, v.z };
 		#else
-		vec = float4(v, v.z);
+		floats = float4(v, v.z);
 		#endif
 	}
 	/**
@@ -180,7 +200,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { v.x, v.y, v.y, v.y };
 		#else
-		vec = float4(v, v.y, v.y);
+		floats = float4(v, v.y, v.y);
 		#endif
 	}
 	/**
@@ -194,7 +214,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = vld1q_f32((const float*)v);
 		#else
-		vec = *v;
+		floats = *v;
 		#endif
 	}
 
@@ -208,7 +228,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcvtq_u32_f32(data);
 		#else
-		return simd_u32_4((uint4)vec);
+		return simd_u32_4((uint4)floats);
 		#endif
 	}
 	/**
@@ -221,107 +241,53 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcvtq_s32_f32(data);
 		#else
-		return simd_i32_4((uint4)vec);
-		#endif
-	}
-
-	/*******************************************************************************************************************
-	 * @brief Casts as 4 component unsigned integer SIMD vector. (Doesn't change the bits)
-	 */
-	simd_u32_4 castToUint() const noexcept
-	{
-		#if defined(MATH_SIMD_SUPPORT_SSE)
-		return _mm_castps_si128(data);
-		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		return vreinterpretq_u32_f32(data);
-		#else
-		return *((const simd_u32_4*)this);
-		#endif
-	}
-	/**
-	 * @brief Casts as 4 component signed integer SIMD vector. (Doesn't change the bits)
-	 */
-	simd_i32_4 castToInt() const noexcept
-	{
-		#if defined(MATH_SIMD_SUPPORT_SSE)
-		return _mm_castps_si128(data);
-		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		return vreinterpretq_us32_f32(data);
-		#else
-		return *((const simd_i32_4*)this);
-		#endif
-	}
-
-	/**
-	 * @brief Casts from the 4 component unsigned integer SIMD vector. (Doesn't change the bits)
-	 */
-	void castFrom(simd_u32_4 v) noexcept
-	{
-		#if defined(MATH_SIMD_SUPPORT_SSE)
-		data = _mm_castsi128_ps(v.data);
-		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		data = vreinterpretq_f32_u32(v.data);
-		#else
-		*this = *((const simd_f32_4*)&v);
-		#endif
-	}
-	/**
-	 * @brief Casts from the 4 component signed integer SIMD vector. (Doesn't change the bits)
-	 */
-	void castFrom(simd_i32_4 v) noexcept
-	{
-		#if defined(MATH_SIMD_SUPPORT_SSE)
-		data = _mm_castsi128_ps(v.data);
-		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		data = vreinterpretq_f32_s32(v.data);
-		#else
-		*this = *((const simd_f32_4*)&v);
+		return simd_i32_4((uint4)floats);
 		#endif
 	}
 	
 	/*******************************************************************************************************************
-	 * @brief Loads SIMD 4 component 32bit floating point aligned vector value.
+	 * @brief Loads SIMD 4 component 32bit floating point aligned vector values.
 	 * @warning Specified vector pointer must be aligned in the memory!!!
 	 * @param[in] v target 4 component vector value pointer (aligned)
 	 */
-	void loadAligned(const float4* v) noexcept
+	void loadAligned(const float* v) noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		data = _mm_load_ps((const float*)v);
+		data = _mm_load_ps(v);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		data = vld1q_f32((const float*)v);
+		data = vld1q_f32(v);
 		#else
-		vec = *v;
+		floats = *((const float4*)v);
 		#endif
 	}
 
 	/**
-	 * @brief Stores SIMD 4 component 32bit floating point unaligned vector value.
+	 * @brief Stores SIMD 4 component 32bit floating point unaligned vector values.
 	 * @param[out] v target 4 component vector value pointer (unaligned)
 	 */
-	void store(float4* v) noexcept
+	void store(float* v) noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		_mm_storeu_ps((float*)v, data);
+		_mm_storeu_ps(v, data);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		vst1q_f32((float*)v, data);
+		vst1q_f32(v, data);
 		#else
-		*v = vec;
+		*((float4*)v) = floats;
 		#endif
 	}
 	/**
-	 * @brief Stores SIMD 4 component 32bit floating point aligned vector value.
+	 * @brief Stores SIMD 4 component 32bit floating point aligned vector values.
 	 * @warning Specified vector pointer must be aligned in the memory!!!
 	 * @param[out] v target 4 component vector value pointer (aligned)
 	 */
-	void storeAligned(float4* v) noexcept
+	void storeAligned(float* v) noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		_mm_store_ps((float*)v, data);
+		_mm_store_ps(v, data);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		vst1q_f32((float*)v, data);
+		vst1q_f32(v, data);
 		#else
-		*v = vec;
+		*((float4*)v) = floats;
 		#endif
 	}
 
@@ -335,7 +301,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_f32(data, 0);
 		#else
-		return vec.x;
+		return floats.x;
 		#endif
 	}
 	/**
@@ -346,7 +312,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#if defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_f32(data, 1);
 		#else
-		return vec.y;
+		return floats.y;
 		#endif
 	}
 	/**
@@ -357,7 +323,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#if defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_f32(data, 2);
 		#else
-		return vec.z;
+		return floats.z;
 		#endif
 	}
 	/**
@@ -368,7 +334,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#if defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_f32(data, 3);
 		#else
-		return vec.w;
+		return floats.w;
 		#endif
 	}
 
@@ -376,22 +342,22 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 	 * @brief Sets SIMD vector first component value.
 	 * @param value target X vector component value
 	 */
-	void setX(float value) noexcept { vec.x = value; }
+	void setX(float value) noexcept { floats.x = value; }
 	/**
 	 * @brief Sets SIMD vector second component value.
 	 * @param value target Y vector component value
 	 */
-	void setY(float value) noexcept { vec.y = value; }
+	void setY(float value) noexcept { floats.y = value; }
 	/**
 	 * @brief Sets SIMD vector third component value.
 	 * @param value target Z vector component value
 	 */
-	void setZ(float value) noexcept { vec.z = value; }
+	void setZ(float value) noexcept { floats.z = value; }
 	/**
 	 * @brief Sets SIMD vector fourth component value.
 	 * @param value target W vector component value
 	 */
-	void setW(float value) noexcept { vec.w = value; }
+	void setW(float value) noexcept { floats.w = value; }
 
 	/**
 	 * @brief Swizzles SIMD vector components.
@@ -414,7 +380,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return __builtin_shufflevector(data, data, X, Y, Z, W);
 		#else
-		return simd_f32_4(vec[X], vec[Y], vec[Z], vec[W]);
+		return simd_f32_4(floats[X], floats[Y], floats[Z], floats[W]);
 		#endif
 	}
 
@@ -427,25 +393,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 	 * @brief Returns SIMD vector component by index.
 	 * @param i target component index
 	 */
-	float& operator[](psize i) noexcept { return vec[i]; }
+	float& operator[](psize i) noexcept { return floats[i]; }
 	/**
 	 * @brief Returns SIMD vector component by index.
 	 * @param i target component index
 	 */
-	float operator[](psize i) const noexcept { return vec[i]; }
+	float operator[](psize i) const noexcept { return floats[i]; }
 
 	/**
 	 * @brief Returns SIMD vector as 4 component floating point vector. (xyzw)
 	 */
-	explicit operator float4() const noexcept { return vec; }
+	explicit operator float4() const noexcept { return floats; }
 	/**
 	 * @brief Returns SIMD vector as 3 component floating point vector. (xyz)
 	 */
-	explicit operator float3() const noexcept { return (float3)vec; }
+	explicit operator float3() const noexcept { return (float3)floats; }
 	/**
 	 * @brief Returns SIMD vector as 2 component floating point vector. (xy)
 	 */
-	explicit operator float2() const noexcept { return (float2)vec; }
+	explicit operator float2() const noexcept { return (float2)floats; }
 	/**
 	 * @brief Returns SIMD first vector component value. (x)
 	 */
@@ -459,7 +425,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vaddq_f32(data, v.data);
 		#else
-		return simd_f32_4(vec + v.vec);
+		return simd_f32_4(floats + v.floats);
 		#endif
 	}
 	simd_f32_4 operator-(simd_f32_4 v) const noexcept
@@ -469,7 +435,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vsubq_f32(data, v.data);
 		#else
-		return simd_f32_4(vec - v.vec);
+		return simd_f32_4(floats - v.floats);
 		#endif
 	}
 	simd_f32_4 operator*(simd_f32_4 v) const noexcept
@@ -479,7 +445,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vmulq_f32(data, v.data);
 		#else
-		return simd_f32_4(vec * v.vec);
+		return simd_f32_4(floats * v.floats);
 		#endif
 	}
 	simd_f32_4 operator/(simd_f32_4 v) const noexcept
@@ -489,7 +455,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vdivq_f32(data, v.data);
 		#else
-		return simd_f32_4(vec / v.vec);
+		return simd_f32_4(floats / v.floats);
 		#endif
 	}
 	simd_f32_4 operator-() const noexcept
@@ -499,18 +465,29 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vnegq_f32(data);
 		#else
-		return simd_f32_4(-vec);
+		return simd_f32_4(-floats);
+		#endif
+	}
+	simd_f32_4 operator+(float n) const noexcept { return *this + simd_f32_4(n); }
+	simd_f32_4 operator-(float n) const noexcept { return *this - simd_f32_4(n); }
+	simd_f32_4 operator*(float n) const noexcept
+	{
+		#if defined(MATH_SIMD_SUPPORT_NEON)
+		return vmulq_n_f32(data, v);
+		#else
+		return *this * simd_f32_4(n);
 		#endif
 	}
+	simd_f32_4 operator/(float n) const noexcept { return *this / simd_f32_4(n); }
 
 	simd_f32_4& operator+=(simd_f32_4 v) noexcept { *this = *this + v; return *this; }
 	simd_f32_4& operator-=(simd_f32_4 v) noexcept { *this = *this - v; return *this; }
 	simd_f32_4& operator*=(simd_f32_4 v) noexcept { *this = *this * v; return *this; }
 	simd_f32_4& operator/=(simd_f32_4 v) noexcept { *this = *this / v; return *this; }
-	simd_f32_4& operator+=(float n) noexcept { *this = *this + simd_f32_4(n); return *this; }
-	simd_f32_4& operator-=(float n) noexcept { *this = *this - simd_f32_4(n); return *this; }
-	simd_f32_4& operator*=(float n) noexcept { *this = *this * simd_f32_4(n); return *this; }
-	simd_f32_4& operator/=(float n) noexcept { *this = *this / simd_f32_4(n); return *this; }
+	simd_f32_4& operator+=(float n) noexcept { *this = *this + n; return *this; }
+	simd_f32_4& operator-=(float n) noexcept { *this = *this - n; return *this; }
+	simd_f32_4& operator*=(float n) noexcept { *this = *this * n; return *this; }
+	simd_f32_4& operator/=(float n) noexcept { *this = *this / n; return *this; }
 	simd_f32_4& operator=(float n) noexcept { *this = simd_f32_4(n); return *this; }
 
 	//******************************************************************************************************************
@@ -521,7 +498,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vminvq_u32(vceqq_f32(data, v.data)) == 0xFFFFFFFFu;
 		#else
-		return vec == v.vec;
+		return floats == v.floats;
 		#endif
 	}
 	bool operator!=(simd_f32_4 v) const noexcept
@@ -531,7 +508,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vminvq_u32(vceqq_f32(data, v.data)) == 0u;
 		#else
-		return vec != v.vec;
+		return floats != v.floats;
 		#endif
 	}
 	simd_u32_4 operator<(simd_f32_4 v) const noexcept
@@ -541,7 +518,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcltq_f32(data, v.data);
 		#else
-		return simd_u32_4(vec < v.vec);
+		return simd_u32_4(floats < v.floats);
 		#endif
 	}
 	simd_u32_4 operator>(simd_f32_4 v) const noexcept
@@ -551,7 +528,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcgtq_f32(data, v.data);
 		#else
-		return simd_u32_4(vec > v.vec);
+		return simd_u32_4(floats > v.floats);
 		#endif
 	}
 	simd_u32_4 operator<=(simd_f32_4 v) const noexcept
@@ -561,7 +538,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcleq_f32(data, v.data);
 		#else
-		return simd_u32_4(vec <= v.vec);
+		return simd_u32_4(floats <= v.floats);
 		#endif
 	}
 	simd_u32_4 operator>=(simd_f32_4 v) const noexcept
@@ -571,23 +548,97 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_f32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcgeq_f32(data, v.data);
 		#else
-		return simd_u32_4(vec >= v.vec);
+		return simd_u32_4(floats >= v.floats);
 		#endif
 	}
 
-	static const simd_f32_4 zero, one, minusOne, inf, minusInf, nan;
+	bool operator==(float n) const noexcept { return *this == simd_f32_4(n); }
+	bool operator!=(float n) const noexcept { return *this != simd_f32_4(n); }
+	simd_u32_4 operator<(float n) const noexcept { return *this < simd_f32_4(n); }
+	simd_u32_4 operator>(float n) const noexcept { return *this > simd_f32_4(n); }
+	simd_u32_4 operator<=(float n) const noexcept { return *this <= simd_f32_4(n); }
+	simd_u32_4 operator>=(float n) const noexcept { return *this >= simd_f32_4(n); }
+
+	static const simd_f32_4 zero, one, minusOne, inf, minusInf, nan, min, minusMin, max, minusMax, epsilon;
 };
 
+//**********************************************************************************************************************
 inline const simd_f32_4 simd_f32_4::zero = simd_f32_4(0.0f);
 inline const simd_f32_4 simd_f32_4::one = simd_f32_4(1.0f);
 inline const simd_f32_4 simd_f32_4::minusOne = simd_f32_4(-1.0f);
 inline const simd_f32_4 simd_f32_4::inf = simd_f32_4(INFINITY);
 inline const simd_f32_4 simd_f32_4::minusInf = simd_f32_4(-INFINITY);
 inline const simd_f32_4 simd_f32_4::nan = simd_f32_4(NAN);
+inline const simd_f32_4 simd_f32_4::min = simd_f32_4(FLT_MIN);
+inline const simd_f32_4 simd_f32_4::minusMin = simd_f32_4(-FLT_MIN);
+inline const simd_f32_4 simd_f32_4::max = simd_f32_4(FLT_MAX);
+inline const simd_f32_4 simd_f32_4::minusMax = simd_f32_4(-FLT_MAX);
+inline const simd_f32_4 simd_f32_4::epsilon = simd_f32_4(FLT_EPSILON);
+
+/**
+ * @brief Casts from the 4 component unsigned integer to floating point SIMD vector. (Doesn't change the bits)
+ */
+static simd_f32_4 uintAsFloat(simd_u32_4 v) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	return _mm_castsi128_ps(v.data);
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	return vreinterpretq_f32_u32(v.data);
+	#else
+	return *((const simd_f32_4*)&v);
+	#endif
+}
+/**
+ * @brief Casts from the 4 component signed integer to floating point SIMD vector. (Doesn't change the bits)
+ */
+static simd_f32_4 intAsFloat(simd_i32_4 v) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	return _mm_castsi128_ps(v.data);
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	return vreinterpretq_f32_s32(v.data);
+	#else
+	return *((const simd_f32_4*)&v);
+	#endif
+}
+
+/**
+ * @brief Casts as 4 component floating point to unsigned integer SIMD vector. (Doesn't change the bits)
+ */
+static simd_u32_4 floatAsUint(simd_f32_4 v) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	return _mm_castps_si128(v.data);
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	return vreinterpretq_u32_f32(data);
+	#else
+	return *((const simd_u32_4*)&v);
+	#endif
+}
+/**
+ * @brief Casts as 4 component floating point to signed integer SIMD vector. (Doesn't change the bits)
+ */
+static simd_i32_4 floatAsInt(simd_f32_4 v) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	return _mm_castps_si128(v.data);
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	return vreinterpretq_s32_f32(v.data);
+	#else
+	return *((const simd_i32_4*)&v);
+	#endif
+}
 
 static simd_f32_4 operator+(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) + v; }
 static simd_f32_4 operator-(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) - v; }
-static simd_f32_4 operator*(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) * v; }
+static simd_f32_4 operator*(float n, simd_f32_4 v) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_NEON)
+	return vmulq_n_f32(v.data, n);
+	#else
+	return simd_f32_4(n) * v;
+	#endif
+}
 static simd_f32_4 operator/(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) / v; }
 static bool operator==(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) == v; }
 static bool operator!=(float n, simd_f32_4 v) noexcept { return simd_f32_4(n) != v; }
@@ -602,16 +653,33 @@ static simd_u32_4 operator>=(float n, simd_f32_4 v) noexcept { return simd_f32_4
  * @param a first SIMD vector to compare
  * @param b second SIMD vector to compare
  */
-static simd_u32_4 compare(simd_f32_4 a, simd_f32_4 b) noexcept
+static simd_u32_4 equal(simd_f32_4 a, simd_f32_4 b) noexcept
 {
 	#if defined(MATH_SIMD_SUPPORT_SSE)
-	return simd_f32_4(_mm_cmpeq_ps(a.data, b.data)).castToUint();
+	return _mm_castps_si128(_mm_cmpeq_ps(a.data, b.data));
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vceqq_f32(a.data, b.data);
 	#else
-	return simd_u32_4(compare(a.vec, b.vec));
+	return simd_u32_4(equal(a.floats, b.floats));
 	#endif
 }
+/**
+ * @brief Compares two SIMD vectors component wise if they are not equal.
+ *
+ * @param a first SIMD vector to compare
+ * @param b second SIMD vector to compare
+ */
+static simd_u32_4 notEqual(simd_f32_4 a, simd_f32_4 b) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	return _mm_castps_si128(_mm_cmpneq_ps(a.data, b.data));
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	return vmvnq_u32(vceqq_f32(a.data, b.data));
+	#else
+	return simd_u32_4(notEqual(a.floats, b.floats));
+	#endif
+}
+
 /**
  * @brief Returns true if first SIMD vector binary representation is less than the second.
  *
@@ -637,7 +705,7 @@ static simd_f32_4 select(simd_u32_4 c, simd_f32_4 t, simd_f32_4 f) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vbslq_f32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(c.data), 31)), t.data, f.data);
 	#else
-	return simd_f32_4(select(c.vec, t.vec, f.vec));
+	return simd_f32_4(select(c.floats, t.floats, f.floats));
 	#endif
 }
 
@@ -654,7 +722,7 @@ static simd_f32_4 min(simd_f32_4 a, simd_f32_4 b) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vminq_f32(a.data, b.data);
 	#else
-	return simd_f32_4(min(a.vec, b.vec));
+	return simd_f32_4(min(a.floats, b.floats));
 	#endif
 }
 /**
@@ -670,31 +738,52 @@ static simd_f32_4 max(simd_f32_4 a, simd_f32_4 b) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vmaxq_f32(a.data, b.data);
 	#else
-	return simd_f32_4(max(a.vec, b.vec));
+	return simd_f32_4(max(a.floats, b.floats));
 	#endif
 }
 
 /**
- * @brief Returns minimum value of all SIMD vector components.
+ * @brief Returns minimum value of 4D SIMD vector components.
  * @param v target SIMD vector
  */
-static float min(simd_f32_4 v) noexcept
+static float min4(simd_f32_4 v) noexcept
 {
 	auto m = min(v, v.swizzle<SwY, SwU, SwW, SwU>());
 	m = min(m, m.swizzle<SwZ, SwU, SwU, SwU>());
 	return m.getX();
 }
 /**
- * @brief Returns maximum value of all SIMD vector components.
+ * @brief Returns maximum value of 4D SIMD vector components.
  * @param v target SIMD vector
  */
-static float max(simd_f32_4 v) noexcept
+static float max4(simd_f32_4 v) noexcept
 {
 	auto m = max(v, v.swizzle<SwY, SwU, SwW, SwU>());
 	m = max(m, m.swizzle<SwZ, SwU, SwU, SwU>());
 	return m.getX();
 }
 
+/**
+ * @brief Returns minimum value of 3D SIMD vector components.
+ * @param v target SIMD vector
+ */
+static float min3(simd_f32_4 v) noexcept
+{
+	auto m = min(v, v.swizzle<SwY, SwU, SwZ, SwU>());
+	m = min(m, m.swizzle<SwZ, SwU, SwU, SwU>());
+	return m.getX();
+}
+/**
+ * @brief Returns maximum value of 3D SIMD vector components.
+ * @param v target SIMD vector
+ */
+static float max3(simd_f32_4 v) noexcept
+{
+	auto m = max(v, v.swizzle<SwY, SwU, SwZ, SwU>());
+	m = max(m, m.swizzle<SwZ, SwU, SwU, SwU>());
+	return m.getX();
+}
+
 /**
  * @brief Clamps SIMD vector components between min and max values.
  * 
@@ -723,7 +812,7 @@ static simd_f32_4 fma(simd_f32_4 mul1, simd_f32_4 mul2, simd_f32_4 add) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vmlaq_f32(add.data, mul1.data, mul2.data);
 	#else
-	return simd_f32_4(fma(mul1.vec, mul2.vec, add.vec));
+	return simd_f32_4(fma(mul1.floats, mul2.floats, add.floats));
 	#endif
 }
 /**
@@ -733,11 +822,14 @@ static simd_f32_4 fma(simd_f32_4 mul1, simd_f32_4 mul2, simd_f32_4 add) noexcept
 static simd_f32_4 abs(simd_f32_4 v) noexcept
 {
 	#if defined(MATH_SIMD_SUPPORT_SSE)
-	return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), v.data), v.data);
+	if constexpr (std::numeric_limits<float>::is_iec559)
+		return _mm_and_ps(v.data, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)));
+	else
+		return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), v.data), v.data);
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vabsq_f32(v.data);
 	#else
-	return simd_f32_4(abs(v.vec));
+	return simd_f32_4(abs(v.floats));
 	#endif
 }
 /**
@@ -751,7 +843,7 @@ static simd_f32_4 sqrt(simd_f32_4 v) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vsqrtq_f32(v.data);
 	#else
-	return simd_f32_4(sqrt(v.vec));
+	return simd_f32_4(sqrt(v.floats));
 	#endif
 }
 
@@ -767,7 +859,7 @@ static simd_f32_4 sign(simd_f32_4 v) noexcept
 	return vreinterpretq_f32_u32(vorrq_u32(vandq_u32(vreinterpretq_u32_f32(v.data), 
 		vreinterpretq_u32_f32(vdupq_n_f32(-1.0f))), vreinterpretq_u32_f32(vdupq_n_f32(1.0f))));
 	#else
-	return simd_f32_4(sign(v.vec));
+	return simd_f32_4(sign(v.floats));
 	#endif
 }
 /**
@@ -782,7 +874,7 @@ static int signBits(simd_f32_4 v) noexcept
 	auto shift = { 0, 1, 2, 3 };
 	return vaddvq_u32(vshlq_u32(vshrq_n_u32(vreinterpretq_u32_f32(v.data), 31), shift));
 	#else
-	return signBits(v.vec);
+	return signBits(v.floats);
 	#endif
 }
 
@@ -797,7 +889,7 @@ static simd_f32_4 round(simd_f32_4 v) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vrndnq_f32(v.data);
 	#else
-	return simd_f32_4(round(v.vec));
+	return simd_f32_4(round(v.floats));
 	#endif
 }
 /**
@@ -811,7 +903,7 @@ static simd_f32_4 floor(simd_f32_4 v) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vrndmq_f32(v.data);
 	#else
-	return simd_f32_4(floor(v.vec));
+	return simd_f32_4(floor(v.floats));
 	#endif
 }
 /**
@@ -825,7 +917,7 @@ static simd_f32_4 ceil(simd_f32_4 v) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vrndpq_f32(v.data);
 	#else
-	return simd_f32_4(ceil(v.vec));
+	return simd_f32_4(ceil(v.floats));
 	#endif
 }
 /**
@@ -839,12 +931,10 @@ static simd_f32_4 trunc(simd_f32_4 v) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vrndq_f32(v.data);
 	#else
-	return simd_f32_4(trunc(v.vec));
+	return simd_f32_4(trunc(v.floats));
 	#endif
 }
 
-// TODO: implement or use libs with fast sin, cos, pow, etc. functions.
-
 /***********************************************************************************************************************
  * @brief Returns dot product between two 4D SIMD vector.
  * 
@@ -858,7 +948,7 @@ static simd_f32_4 dotV4(simd_f32_4 a, simd_f32_4 b) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vdupq_n_f32(vaddvq_f32(vmulq_f32(a.data, b.data)));
 	#else
-	return simd_f32_4(dot(a.vec, b.vec));
+	return simd_f32_4(dot(a.floats, b.floats));
 	#endif
 }
 /**
@@ -874,7 +964,7 @@ static float dot4(simd_f32_4 a, simd_f32_4 b) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vaddvq_f32(vmulq_f32(a.data, b.data));
 	#else
-	return dot(a.vec, b.vec);
+	return dot(a.floats, b.floats);
 	#endif
 }
 
@@ -891,7 +981,7 @@ static simd_f32_4 dotV3(simd_f32_4 a, simd_f32_4 b) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vdupq_n_f32(vaddvq_f32(vsetq_lane_f32(0, vmulq_f32(a.data, b.data), 3)));
 	#else
-	return simd_f32_4(dot((float3)a.vec, (float3)b.vec));
+	return simd_f32_4(dot((float3)a.floats, (float3)b.floats));
 	#endif
 }
 /**
@@ -907,7 +997,7 @@ static float dot3(simd_f32_4 a, simd_f32_4 b) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vaddvq_f32(vsetq_lane_f32(0, vmulq_f32(a.data, b.data), 3));
 	#else
-	return dot((float3)a.vec, (float3)b.vec);
+	return dot((float3)a.floats, (float3)b.floats);
 	#endif
 }
 
@@ -930,8 +1020,8 @@ static simd_f32_4 cross3(simd_f32_4 a, simd_f32_4 b) noexcept
 		vmulq_f32(__builtin_shufflevector(a.data, a.data, 1, 2, 0, 0), b.data));
 	return __builtin_shufflevector(r, r, 1, 2, 0, 0);
 	#else
-	auto r = cross((float3)a.vec, (float3)b.vec);
-	return simd_f32_4(r.x, r.y, r.z, r.z);
+	auto r = cross((float3)a.floats, (float3)b.floats);
+	return simd_f32_4(r.x, r.y, r.z); // TODO: not sure if this is the same result as SIMD, check this.
 	#endif
 }
 
@@ -951,7 +1041,7 @@ static float length4(simd_f32_4 v) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vget_lane_f32(vsqrt_f32(vdup_n_f32(vaddvq_f32(vmulq_f32(v.data, v.data)))), 0);
 	#else
-	return length(v.vec);
+	return length(v.floats);
 	#endif
 }
 
@@ -967,11 +1057,11 @@ static float lengthSq3(simd_f32_4 v) noexcept { return dot3(v, v); }
 static float length3(simd_f32_4 v) noexcept
 {
 	#if defined(MATH_SIMD_SUPPORT_SSE4_1)
-	return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.data, v.data, 0xff)));
+	return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.data, v.data, 0x7f)));
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vget_lane_f32(vsqrt_f32(vdup_n_f32(vaddvq_f32(vmulq_f32(v.data, v.data)))), 0);
 	#else
-	return length((float3)v.vec);
+	return length((float3)v.floats);
 	#endif
 }
 
@@ -1031,9 +1121,230 @@ static simd_f32_4 normalize3(simd_f32_4 v) noexcept
 	return vdivq_f32(v.data, vsqrtq_f32(vdupq_n_f32(
 		vaddvq_f32(vsetq_lane_f32(0, vmulq_f32(v.data, v.data), 3)))));
 	#else
-	auto r = normalize((float3)v.vec);
-	return simd_f32_4(r.x, r.y, r.z, r.z);
+	auto r = normalize((float3)v.floats);
+	return simd_f32_4(r.x, r.y, r.z);
 	#endif
 }
 
+/**
+ * @brief Returns true if SIMD vector is normalized with specified tolerance.
+ * 
+ * @param v target SIMD vector to check
+ * @param tolerance floating point precision tolerance
+ */
+static bool isNormalized4(simd_f32_4 v, float tolerance = 1.0e-6f) noexcept
+{
+	return std::abs(lengthSq4(v) - 1.0f) <= tolerance;
+}
+/**
+ * @brief Returns true if SIMD vector is normalized with specified tolerance.
+ * 
+ * @param v target SIMD vector to check
+ * @param tolerance floating point precision tolerance
+ */
+static bool isNormalized3(simd_f32_4 v, float tolerance = 1.0e-6f) noexcept
+{
+	return std::abs(lengthSq3(v) - 1.0f) <= tolerance;
+}
+
+/***********************************************************************************************************************
+ * @brief Returns float remainder of numer/denom for each element of the SIMD vector.
+ * 
+ * @param n quatient numerator SIMD vector
+ * @param d quatient denominator SIMD vector
+ */
+static simd_f32_4 mod(simd_f32_4 n, simd_f32_4 d) noexcept
+{
+    return n - floor(n / d) * d;
+}
+
+/**
+ * @brief Returns natural logarithm for each element of the SIMD vector. [r = inv(e^x)]
+ * @param v target SIMD vector
+ */
+static simd_f32_4 log(simd_f32_4 v) noexcept
+{
+	if constexpr (std::numeric_limits<float>::is_iec559)
+	{
+		// Based on sleefsimdsp.c from the Sleef library.
+		auto o = v < simd_f32_4::min;
+		v = select(o, v * ((float)((int64)1 << 32) * (float)((int64)1 << 32)), v);
+
+		auto e = ((floatAsUint(v * (1.0f / 0.75f)) >> 23u) & 0xFFu) - 0x7Fu;
+		auto m = intAsFloat(floatAsInt(v) + ((-uintAsInt(e)) << 23));
+		e = select(o, e - 64, e);
+
+		auto x = (m - simd_f32_4::one) / (m + simd_f32_4::one);
+		auto x2 = x * x;
+
+		auto t = simd_f32_4(0.2392828464508056640625f);
+		t = fma(t, x2, simd_f32_4(0.28518211841583251953125f));
+		t = fma(t, x2, simd_f32_4(0.400005877017974853515625f));
+		t = fma(t, x2, simd_f32_4(0.28518211841583251953125f));
+		t = fma(t, x2, simd_f32_4(0.666666686534881591796875f));
+		t = fma(t, x2, simd_f32_4(2.0f));
+
+		x = fma(x, t, simd_f32_4(0.693147180559945286226764f) * (simd_f32_4)e);
+		x = select(equal(v, simd_f32_4::inf) | equal(v, simd_f32_4::minusInf), simd_f32_4::minusInf, x);
+		x = select((v < simd_f32_4::zero) | equal(v, simd_f32_4::nan), simd_f32_4::nan, x);
+		x = select(equal(v, simd_f32_4::zero), simd_f32_4::minusInf, x);
+		return x;
+	}
+	else
+	{
+		return simd_f32_4(std::log(v.getX()), std::log(v.getY()), 
+			std::log(v.getZ()), std::log(v.getW()));
+	}
+}
+/**
+ * @brief Returns base-e exponential function for each element of the SIMD vector. [r = e^x]
+ * @param v target SIMD vector
+ */
+static simd_f32_4 exp(simd_f32_4 v) noexcept
+{
+	if constexpr (std::numeric_limits<float>::is_iec559)
+	{
+		// Based on sleefsimdsp.c from the Sleef library.
+		auto s = v * M_LOG2E;
+		auto q = (simd_i32_4)select(s < 0.0f, s - 0.5f, s + 0.5f);
+		auto qFloat = (simd_f32_4)q;
+		s = fma(qFloat, simd_f32_4(-0.693145751953125f), v);
+		s = fma(qFloat, simd_f32_4(-1.428606765330187045e-06f), s);
+
+		auto u = simd_f32_4(0.000198527617612853646278381f);
+		u = fma(u, s, simd_f32_4(0.00139304355252534151077271f));
+		u = fma(u, s, simd_f32_4(0.00833336077630519866943359f));
+		u = fma(u, s, simd_f32_4(0.0416664853692054748535156f));
+		u = fma(u, s, simd_f32_4(0.166666671633720397949219f));
+		u = fma(u, s, simd_f32_4(0.5f));
+  
+		u = fma(s * s, u, s) + simd_f32_4::one;
+		u = u * intAsFloat(((q >> 1) + 0x7f) << 23) * 
+			intAsFloat(((q - (q >> 1)) + 0x7f) << 23);
+
+		u = uintAsFloat(floatAsUint(u) & (v >= -104.0f));
+		u = select(v > 104.0f, simd_f32_4::inf, u);
+		return u;
+	}
+	else
+	{
+		return simd_f32_4(std::exp(v.getX()), std::exp(v.getY()), 
+			std::exp(v.getZ()), std::exp(v.getW()));
+	}
+}
+
+/***********************************************************************************************************************
+ * @brief Returns base b raised to the power e for each element of the SIMD vector.
+ * 
+ * @param b SIMD vector with base values
+ * @param e SIMD vector with exponent values
+ */
+static simd_f32_4 pow(simd_f32_4 b, simd_f32_4 e) noexcept
+{
+	// TODO: use SIMD implementation from the sleefsimdsp.c
+	return simd_f32_4(std::pow(b.getX(), e.getX()), std::pow(b.getY(), e.getY()),
+		std::pow(b.getZ(), e.getZ()), std::pow(b.getW(), e.getW()));
+}
+/**
+ * @brief Returns base b raised to the power e for each element of the SIMD vector. (Fast approximation)
+ * @warning It has lower precision compared to the pow() function!
+ * 
+ * @param b SIMD vector with base values
+ * @param e SIMD vector with exponent values
+ */
+static simd_f32_4 fastPow(simd_f32_4 b, simd_f32_4 e) noexcept
+{
+	if constexpr (std::numeric_limits<float>::is_iec559)
+	{
+		// Based on sleefsimdsp.c from the Sleef library.
+		auto r = exp(log(abs(b)) * e);
+		auto eInt = (simd_i32_4)e; auto eAbs = abs(e);
+		auto eIsInt = equal(e, (simd_f32_4)eInt) | (eAbs >= ((int64)1 << 24));
+		auto eIsOdd = notEqual(eInt & 1, simd_i32_4::zero) & eIsInt & (eAbs < ((int64)1 << 24));
+		r *= select((b < simd_f32_4::zero) & eIsOdd, simd_f32_4::minusOne, simd_f32_4::one);
+		r = uintAsFloat(bic(floatAsUint(r), equal(b, simd_f32_4::zero)));
+		r = select(equal(e, simd_f32_4::zero), simd_f32_4::one, r);
+		return r;
+	}
+	else
+	{
+		return pow(b, e);
+	}
+}
+
+/***********************************************************************************************************************
+ * @brief Calculates sine and cosine for each element of the SIMD vector.
+ * 
+ * @param v target SIMD vector to get sin and cos from
+ * @param[out] sin SIMD vector with sine values
+ * @param[out] cos SIMD vector with cosine values
+ */
+static void sinCos(simd_f32_4 v, simd_f32_4& sin, simd_f32_4& cos) noexcept
+{
+	if constexpr (std::numeric_limits<float>::is_iec559)
+	{
+		// Based on sinf.c from the cephes library
+		auto sinSign = floatAsUint(v) & 0x80000000u;
+		auto x = uintAsFloat(floatAsUint(v) ^ sinSign);
+		auto quadrant = (simd_u32_4)fma(simd_f32_4(M_2_PI), x, simd_f32_4(0.5f));
+		auto floatQuadrant = (simd_f32_4)quadrant;
+		x = ((x - floatQuadrant * 1.5703125f) - floatQuadrant * 
+			0.0004837512969970703125f) - floatQuadrant * 7.549789948768648e-8f;
+	
+		auto x2 = x * x;
+		auto taylorCos = fma((2.443315711809948e-5f * x2 - 1.388731625493765e-3f), x2, 
+			simd_f32_4(4.166664568298827e-2f)) * x2 * x2 - 0.5f * x2 + simd_f32_4::one;
+		auto taylorSin = (fma(simd_f32_4(-1.9515295891e-4f), x2, 
+			simd_f32_4(8.3321608736e-3f)) * x2 - 1.6666654611e-1f) * x2 * x + x;
+
+		auto bit1 = quadrant << 31u, bit2 = (quadrant << 30u) & 0x80000000u;
+		auto s = select(bit1, taylorCos, taylorSin);
+		auto c = select(bit1, taylorSin, taylorCos);
+
+		sinSign ^= bit2;
+		auto cosSign = bit1 ^ bit2;
+		sin = uintAsFloat(floatAsUint(s) ^ sinSign);
+		cos = uintAsFloat(floatAsUint(c) ^ cosSign);
+	}
+	else
+	{
+		sin = simd_f32_4(std::sin(v.getX()), std::sin(v.getY()), 
+			std::sin(v.getZ()), std::sin(v.getW()));
+		cos = simd_f32_4(std::cos(v.getX()), std::cos(v.getY()), 
+			std::cos(v.getZ()), std::cos(v.getW()));
+	}
+}
+
+/***********************************************************************************************************************
+ * @brief Remaps each component of SIMD vector to the 0.0 - 1.0 range.
+ * @param v target SIMD vector
+ */
+static simd_f32_4 repeat(simd_f32_4 v) noexcept
+{
+	auto r = select(v < 0.0f, 1.0f - mod(-v, simd_f32_4::one), v);
+	return select(v < 1.0f, r, mod(v, simd_f32_4::one));
+}
+
+/**
+ * @brief Linearly interpolates each component of the SIMD vector between a and b using t.
+ * 
+ * @param a minimum SIMD vector (t == 0.0)
+ * @param b maximum SIMD vector (t == 1.0)
+ * @param t target interpolation value (0.0 - 1.0)
+ */
+static simd_f32_4 lerp(simd_f32_4 a, simd_f32_4 b, float t) noexcept { return a * (1.0f - t) + b * t; }
+/**
+ * @brief Linearly interpolates each component of the SIMD vector between a and b using t, taking into account delta time.
+ * @note Always use this function instead of basic lerp() when you have variable delta time!
+ * 
+ * @param a minimum SIMD vector (t == 0.0)
+ * @param b maximum SIMD vector (t == 1.0)
+ * @param t target interpolation value (0.0 - 1.0)
+ * @param dt current delta time
+ */
+static simd_f32_4 lerpDelta(simd_f32_4 a, simd_f32_4 b, float f, float dt) noexcept
+{
+	return a + (1.0f - std::pow(f, dt)) * (b - a);
+}
+
 } // namespace math
\ No newline at end of file
diff --git a/include/math/simd/vector/int.hpp b/include/math/simd/vector/int.hpp
index f24bf8b..e1c3484 100644
--- a/include/math/simd/vector/int.hpp
+++ b/include/math/simd/vector/int.hpp
@@ -33,7 +33,9 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 	union
 	{
 		_simd_i128 data; /** Packed SIMD 4 component 32bit signed integer vector data. */
-		int4 vec;
+		int4 ints;
+		uint4 uints;
+		float4 floats;
 	};
 
 	simd_i32_4(const simd_i32_4& v) = default;
@@ -49,7 +51,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = vdupq_n_s32(0);
 		#else
-		vec = int4::zero;
+		ints = int4::zero;
 		#endif
 	}
 	/**
@@ -63,7 +65,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = vdupq_n_s32(xyzw);
 		#else
-		vec = int4(xyzw);
+		ints = int4(xyzw);
 		#endif
 	}
 	/**
@@ -81,7 +83,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { x, y, z, w };
 		#else
-		vec = int4(x, y, z, w);
+		ints = int4(x, y, z, w);
+		#endif
+	}
+	/**
+	 * @brief Creates a new SIMD 4 component 32bit signed integer vector structure.
+	 * @warning This constructor duplicates Z component to the W component!
+	 *
+	 * @param x first vector component value
+	 * @param y second vector component value
+	 * @param z third vector component value
+	 */
+	simd_i32_4(int32 x, int32 y, int32 z) noexcept
+	{
+		#if defined(MATH_SIMD_SUPPORT_SSE)
+		data = _mm_set_epi32((int)z, (int)z, (int)y, (int)x);
+		#elif defined(MATH_SIMD_SUPPORT_NEON)
+		data = { x, y, z, z };
+		#else
+		ints = int4(x, y, z, z);
 		#endif
 	}
 	/**
@@ -92,14 +112,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 	 */
 	simd_i32_4(simd_i32_4 xyz, int32 w) noexcept
 	{
-		#if defined(MATH_SIMD_SUPPORT_SSE4_1)
+		#if defined(MATH_SIMD_SUPPORT_AVX2)
 		data = _mm_blend_epi32(xyz.data, _mm_set1_epi32(w), 8);
 		#elif defined(MATH_SIMD_SUPPORT_SSE)
-		data = _mm_set_epi32(w, xyz.vec.z, xyz.vec.y, xyz.vec.x);
+		data = _mm_set_epi32(w, xyz.ints.z, xyz.ints.y, xyz.ints.x);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		data = vsetq_lane_u32(w, xyz.data, 3);
+		data = vsetq_lane_s32(w, xyz.data, 3);
 		#else
-		vec = xyz.vec; vec.w = w;
+		ints = xyz.ints; ints.w = w;
 		#endif
 	}
 
@@ -113,16 +133,16 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 
 	/**
 	 * @brief Creates a new SIMD 4 component 32bit floating point vector structure.
-	 * @param data target vector unsigned integer SIMD data
+	 * @param v target vector unsigned integer SIMD data
 	 */
 	explicit simd_i32_4(simd_u32_4 v)
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		this->data = v.data;
+		data = v.data;
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		this->data = v.data;
+		data = vreinterpretq_s32_u32(v.data);
 		#else
-		vec = v.vec;
+		ints = v.ints;
 		#endif
 	}
 
@@ -137,7 +157,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { v.x, v.y, v.z, v.w };
 		#else
-		vec = v;
+		ints = v;
 		#endif
 	}
 	/**
@@ -151,7 +171,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { v.x, v.y, v.z, v.z };
 		#else
-		vec = int4(v, v.z);
+		ints = int4(v, v.z);
 		#endif
 	}
 	/**
@@ -165,7 +185,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { v.x, v.y, v.y, v.y };
 		#else
-		vec = int4(v, v.y, v.y);
+		ints = int4(v, v.y, v.y);
 		#endif
 	}
 	/**
@@ -179,7 +199,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = vld1q_s32((const int32*)v);
 		#else
-		vec = *v;
+		ints = *v;
 		#endif
 	}
 	
@@ -191,55 +211,55 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#if defined(MATH_SIMD_SUPPORT_SSE)
 		return data;
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		return data;
+		return vreinterpretq_u32_s32(data);
 		#else
-		return simd_u32_4((uint4)vec);
+		return simd_u32_4((uint4)ints);
 		#endif
 	}
 	
 	/*******************************************************************************************************************
-	 * @brief Loads SIMD 4 component 32bit signed integer aligned vector value.
+	 * @brief Loads SIMD 4 component 32bit signed integer aligned vector values.
 	 * @warning Specified vector pointer must be aligned in the memory!!!
 	 * @param[in] v target 4 component vector value pointer (aligned)
 	 */
-	void loadAligned(const int4* v) noexcept
+	void loadAligned(const int32* v) noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		data = _mm_load_si128((const _simd_i128*)v);
+		data = _mm_load_si128((const __m128i*)v);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		data = vld1q_s32((const int32*)v);
+		data = vld1q_s32(v);
 		#else
-		vec = *v;
+		ints = *((const int4*)v);
 		#endif
 	}
 
 	/**
-	 * @brief Stores SIMD 4 component 32bit signed integer unaligned vector value.
+	 * @brief Stores SIMD 4 component 32bit signed integer unaligned vector values.
 	 * @param[out] v target 4 component vector value pointer (unaligned)
 	 */
-	void store(int4* v) noexcept
+	void store(int32* v) noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		_mm_storeu_si128((_simd_i128*)v, data);
+		_mm_storeu_si128((__m128i*)v, data);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		vst1q_s32((int32*)v, data);
+		vst1q_s32(v, data);
 		#else
-		*v = vec;
+		*((int4*)v) = ints;
 		#endif
 	}
 	/**
-	 * @brief Stores SIMD 4 component 32bit signed integer aligned vector value.
+	 * @brief Stores SIMD 4 component 32bit signed integer aligned vector values.
 	 * @warning Specified vector pointer must be aligned in the memory!!!
 	 * @param[out] v target 4 component vector value pointer (aligned)
 	 */
-	void storeAligned(int4* v) noexcept
+	void storeAligned(int32* v) noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		_mm_store_si128((_simd_i128*)v, data);
+		_mm_store_si128((__m128i*)v, data);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		vst1q_s32((int32*)v, data);
+		vst1q_s32(v, data);
 		#else
-		*v = vec;
+		*((int4*)v) = ints;
 		#endif
 	}
 
@@ -253,7 +273,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_s32(data, 0);
 		#else
-		return vec.x;
+		return ints.x;
 		#endif
 	}
 	/**
@@ -264,7 +284,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#if defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_s32(data, 1);
 		#else
-		return vec.y;
+		return ints.y;
 		#endif
 	}
 	/**
@@ -275,7 +295,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#if defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_s32(data, 2);
 		#else
-		return vec.z;
+		return ints.z;
 		#endif
 	}
 	/**
@@ -286,7 +306,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#if defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_s32(data, 3);
 		#else
-		return vec.w;
+		return ints.w;
 		#endif
 	}
 
@@ -294,22 +314,22 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 	 * @brief Sets SIMD vector first component value.
 	 * @param value target X vector component value
 	 */
-	void setX(int32 value) noexcept { vec.x = value; }
+	void setX(int32 value) noexcept { ints.x = value; }
 	/**
 	 * @brief Sets SIMD vector second component value.
 	 * @param value target Y vector component value
 	 */
-	void setY(int32 value) noexcept { vec.y = value; }
+	void setY(int32 value) noexcept { ints.y = value; }
 	/**
 	 * @brief Sets SIMD vector third component value.
 	 * @param value target Z vector component value
 	 */
-	void setZ(int32 value) noexcept { vec.z = value; }
+	void setZ(int32 value) noexcept { ints.z = value; }
 	/**
 	 * @brief Sets SIMD vector fourth component value.
 	 * @param value target W vector component value
 	 */
-	void setW(int32 value) noexcept { vec.w = value; }
+	void setW(int32 value) noexcept { ints.w = value; }
 
 	/**
 	 * @brief Swizzles SIMD vector components.
@@ -332,7 +352,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return __builtin_shufflevector(data, data, X, Y, Z, W);
 		#else
-		return simd_i32_4(vec[X], vec[Y], vec[Z], vec[W]);
+		return simd_i32_4(ints[X], ints[Y], ints[Z], ints[W]);
 		#endif
 	}
 
@@ -345,25 +365,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 	 * @brief Returns SIMD vector component by index.
 	 * @param i target component index
 	 */
-	int32& operator[](psize i) noexcept { return vec[i]; }
+	int32& operator[](psize i) noexcept { return ints[i]; }
 	/**
 	 * @brief Returns SIMD vector component by index.
 	 * @param i target component index
 	 */
-	int32 operator[](psize i) const noexcept { return vec[i]; }
+	int32 operator[](psize i) const noexcept { return ints[i]; }
 
 	/**
 	 * @brief Returns SIMD vector as 4 component signed integer vector. (xyzw)
 	 */
-	explicit operator int4() const noexcept { return vec; }
+	explicit operator int4() const noexcept { return ints; }
 	/**
 	 * @brief Returns SIMD vector as 3 component signed integer vector. (xyz)
 	 */
-	explicit operator int3() const noexcept { return (int3)vec; }
+	explicit operator int3() const noexcept { return (int3)ints; }
 	/**
 	 * @brief Returns SIMD vector as 2 component signed integer vector. (xy)
 	 */
-	explicit operator int2() const noexcept { return (int2)vec; }
+	explicit operator int2() const noexcept { return (int2)ints; }
 	/**
 	 * @brief Returns SIMD first vector component value. (x)
 	 */
@@ -377,7 +397,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vaddq_s32(data, v.data);
 		#else
-		return simd_i32_4(vec + v.vec);
+		return simd_i32_4(ints + v.ints);
 		#endif
 	}
 	simd_i32_4 operator-(simd_i32_4 v) const noexcept
@@ -387,7 +407,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vsubq_s32(data, v.data);
 		#else
-		return simd_i32_4(vec - v.vec);
+		return simd_i32_4(ints - v.ints);
 		#endif
 	}
 	simd_i32_4 operator*(simd_i32_4 v) const noexcept
@@ -397,11 +417,11 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vmulq_s32(data, v.data);
 		#else
-		return simd_i32_4(vec * v.vec);
+		return simd_i32_4(ints * v.ints);
 		#endif
 	}
-	simd_i32_4 operator/(simd_i32_4 v) const noexcept { return simd_i32_4(vec / v.vec); }
-	simd_i32_4 operator%(simd_i32_4 v) const noexcept { return simd_i32_4(vec % v.vec); }
+	simd_i32_4 operator/(simd_i32_4 v) const noexcept { return simd_i32_4(ints / v.ints); }
+	simd_i32_4 operator%(simd_i32_4 v) const noexcept { return simd_i32_4(ints % v.ints); }
 
 	//******************************************************************************************************************
 	simd_i32_4 operator&(simd_i32_4 v) const noexcept
@@ -411,7 +431,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vandq_s32(data, v.data);
 		#else
-		return simd_i32_4(vec & v.vec);
+		return simd_i32_4(ints & v.ints);
 		#endif
 	}
 	simd_i32_4 operator|(simd_i32_4 v) const noexcept
@@ -421,7 +441,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vorrq_s32(data, v.data);
 		#else
-		return simd_i32_4(vec | v.vec);
+		return simd_i32_4(ints | v.ints);
 		#endif
 	}
 	simd_i32_4 operator^(simd_i32_4 v) const noexcept
@@ -431,7 +451,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return veorq_s32(data, v.data);
 		#else
-		return simd_i32_4(vec ^ v.vec);
+		return simd_i32_4(ints ^ v.ints);
 		#endif
 	}
 	simd_i32_4 operator>>(simd_i32_4 v) const noexcept
@@ -441,7 +461,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vshlq_s32(data, vnegq_s32(v.data));
 		#else
-		return simd_i32_4(vec >> v.vec);
+		return simd_i32_4(ints >> v.ints);
 		#endif
 	}
 	simd_i32_4 operator<<(simd_i32_4 v) const noexcept
@@ -451,7 +471,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vshlq_s32(data, v.data);
 		#else
-		return simd_i32_4(vec << v.vec);
+		return simd_i32_4(ints << v.ints);
 		#endif
 	}
 	simd_i32_4 operator>>(int32 n) const noexcept
@@ -461,17 +481,17 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vshrq_n_s32(data, n);
 		#else
-		return simd_i32_4(vec >> n);
+		return simd_i32_4(ints >> n);
 		#endif
 	}
 	simd_i32_4 operator<<(int32 n) const noexcept
 	{
-		#if defined(MATH_SIMD_SUPPORT_AVX2)
+		#if defined(MATH_SIMD_SUPPORT_SSE)
 		return _mm_slli_epi32(data, n);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vshlq_n_s32(data, n);
 		#else
-		return simd_i32_4(vec << n);
+		return simd_i32_4(ints << n);
 		#endif
 	}
 	simd_i32_4 operator-() const noexcept
@@ -481,7 +501,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vnegq_s32(data);
 		#else
-		return simd_i32_4(-vec);
+		return simd_i32_4(-ints);
 		#endif
 	}
 	simd_i32_4 operator!() const noexcept
@@ -491,7 +511,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vandq_s32(vceqq_s32(data, vdupq_n_s32(0)), vdupq_n_s32(1));
 		#else
-		return simd_i32_4(!vec);
+		return simd_i32_4(!ints);
 		#endif
 	}
 	simd_i32_4 operator~() const noexcept
@@ -501,11 +521,20 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vmvnq_s32(data);
 		#else
-		return simd_i32_4(~vec);
+		return simd_i32_4(~ints);
 		#endif
 	}
 
 	//******************************************************************************************************************
+	simd_i32_4 operator+(int32 n) const noexcept { return *this + simd_i32_4(n); }
+	simd_i32_4 operator-(int32 n) const noexcept { return *this - simd_i32_4(n); }
+	simd_i32_4 operator*(int32 n) const noexcept { return *this * simd_i32_4(n); }
+	simd_i32_4 operator/(int32 n) const noexcept { return *this / simd_i32_4(n); }
+	simd_i32_4 operator%(int32 n) const noexcept { return *this % simd_i32_4(n); }
+	simd_i32_4 operator&(int32 n) const noexcept { return *this & simd_i32_4(n); }
+	simd_i32_4 operator|(int32 n) const noexcept { return *this | simd_i32_4(n); }
+	simd_i32_4 operator^(int32 n) const noexcept { return *this ^ simd_i32_4(n); }
+	
 	simd_i32_4& operator+=(simd_i32_4 v) noexcept { *this = *this + v; return *this; }
 	simd_i32_4& operator-=(simd_i32_4 v) noexcept { *this = *this - v; return *this; }
 	simd_i32_4& operator*=(simd_i32_4 v) noexcept { *this = *this * v; return *this; }
@@ -537,7 +566,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vminvq_s32(vceqq_s32(data, v.data)) == 0xFFFFFFFF;
 		#else
-		return vec == v.vec;
+		return ints == v.ints;
 		#endif
 	}
 	bool operator!=(simd_i32_4 v) const noexcept
@@ -547,7 +576,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return !vminvq_s32(vceqq_s32(data, v.data));
 		#else
-		return vec != v.vec;
+		return ints != v.ints;
 		#endif
 	}
 	simd_u32_4 operator<(simd_i32_4 v) const noexcept
@@ -557,7 +586,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcltq_s32(data, v.data);
 		#else
-		return simd_u32_4(vec < v.vec);
+		return simd_u32_4(ints < v.ints);
 		#endif
 	}
 	simd_u32_4 operator>(simd_i32_4 v) const noexcept
@@ -567,7 +596,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcgtq_s32(data, v.data);
 		#else
-		return simd_u32_4(vec > v.vec);
+		return simd_u32_4(ints > v.ints);
 		#endif
 	}
 	simd_u32_4 operator<=(simd_i32_4 v) const noexcept
@@ -577,7 +606,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcleq_s32(data, v.data);
 		#else
-		return simd_u32_4(vec <= v.vec);
+		return simd_u32_4(ints <= v.ints);
 		#endif
 	}
 	simd_u32_4 operator>=(simd_i32_4 v) const noexcept
@@ -587,10 +616,17 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_i32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcgeq_s32(data, v.data);
 		#else
-		return simd_u32_4(vec >= v.vec);
+		return simd_u32_4(ints >= v.ints);
 		#endif
 	}
 
+	bool operator==(int32 n) const noexcept { return *this == simd_i32_4(n); }
+	bool operator!=(int32 n) const noexcept { return *this != simd_i32_4(n); }
+	simd_u32_4 operator<(int32 n) const noexcept { return *this < simd_i32_4(n); }
+	simd_u32_4 operator>(int32 n) const noexcept { return *this > simd_i32_4(n); }
+	simd_u32_4 operator<=(int32 n) const noexcept { return *this <= simd_i32_4(n); }
+	simd_u32_4 operator>=(int32 n) const noexcept { return *this >= simd_i32_4(n); }
+
 	static const simd_i32_4 zero, one, minusOne, min, max;
 };
 
@@ -617,22 +653,70 @@ static simd_u32_4 operator>(int32 n, simd_i32_4 v) noexcept { return simd_i32_4(
 static simd_u32_4 operator<=(int32 n, simd_i32_4 v) noexcept { return simd_i32_4(n) <= v; }
 static simd_u32_4 operator>=(int32 n, simd_i32_4 v) noexcept { return simd_i32_4(n) >= v; }
 
+/**
+ * @brief Casts from the 4 component unsigned integer to signed integer SIMD vector. (Doesn't change the bits)
+ */
+static simd_i32_4 uintAsInt(simd_u32_4 v) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	return v.data;
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	return vreinterpretq_u32_s32(v.data);
+	#else
+	return *((const simd_i32_4*)&v);
+	#endif
+}
+
 /***********************************************************************************************************************
  * @brief Compares two SIMD vectors component wise if they are equal.
  *
  * @param a first SIMD vector to compare
  * @param b second SIMD vector to compare
  */
-static simd_i32_4 compare(simd_i32_4 a, simd_i32_4 b) noexcept
+static simd_u32_4 equal(simd_i32_4 a, simd_i32_4 b) noexcept
 {
 	#if defined(MATH_SIMD_SUPPORT_SSE)
 	return _mm_cmpeq_epi32(a.data, b.data);
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vceqq_s32(a.data, b.data);
 	#else
-	return simd_i32_4(compare(a.vec, b.vec));
+	return simd_u32_4(equal(a.ints, b.ints));
+	#endif
+}
+/**
+ * @brief Compares two SIMD vectors component wise if they are not equal.
+ *
+ * @param a first SIMD vector to compare
+ * @param b second SIMD vector to compare
+ */
+static simd_u32_4 notEqual(simd_i32_4 a, simd_i32_4 b) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	return _mm_xor_si128(_mm_cmpeq_epi32(a.data, b.data), _mm_set1_epi32(0xFFFFFFFF));
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	return vmvnq_u32(vceqq_s32(a.data, b.data));
+	#else
+	return simd_u32_4(notEqual(a.ints, b.ints));
 	#endif
 }
+
+/**
+ * @brief SIMD vector bitwise bit clear component wise. [r = a & (~b)]
+ *
+ * @param a first SIMD vector
+ * @param b second SIMD vector
+ */
+static simd_i32_4 bic(simd_i32_4 a, simd_i32_4 b) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	return _mm_andnot_si128(b.data, a.data); // flipped
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	return vbicq_s32(a.data, b.data);
+	#else
+	return simd_i32_4(a.ints & (~b.ints));
+	#endif
+}
+
 /**
  * @brief Returns true if first SIMD vector binary representation is less than the second.
  *
@@ -660,7 +744,7 @@ static simd_i32_4 select(simd_u32_4 c, simd_i32_4 t, simd_i32_4 f) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vbslq_s32(vshrq_n_s32(vreinterpretq_s32_u32(c.data), 31), t.data, f.data);
 	#else
-	return simd_i32_4(select(c.vec, t.vec, f.vec));
+	return simd_i32_4(select(c.ints, t.ints, f.ints));
 	#endif
 }
 
@@ -677,7 +761,7 @@ static simd_i32_4 min(simd_i32_4 a, simd_i32_4 b) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vminq_s32(a.data, b.data);
 	#else
-	return simd_i32_4(min(a.vec, b.vec));
+	return simd_i32_4(min(a.ints, b.ints));
 	#endif
 }
 /**
@@ -693,7 +777,7 @@ static simd_i32_4 max(simd_i32_4 a, simd_i32_4 b) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vmaxq_s32(a.data, b.data);
 	#else
-	return simd_i32_4(max(a.vec, b.vec));
+	return simd_i32_4(max(a.ints, b.ints));
 	#endif
 }
 
diff --git a/include/math/simd/vector/uint.hpp b/include/math/simd/vector/uint.hpp
index 90da3ec..ca660c1 100644
--- a/include/math/simd/vector/uint.hpp
+++ b/include/math/simd/vector/uint.hpp
@@ -39,7 +39,9 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 	union
 	{
 		_simd_u128 data; /** Packed SIMD 4 component 32bit unsigned integer vector data. */
-		uint4 vec;
+		uint4 uints;
+		int4 ints;
+		float4 floats;
 	};
 
 	simd_u32_4(const simd_u32_4& v) = default;
@@ -55,7 +57,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = vdupq_n_u32(0);
 		#else
-		vec = uint4::zero;
+		uints = uint4::zero;
 		#endif
 	}
 	/**
@@ -69,7 +71,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = vdupq_n_u32(xyzw);
 		#else
-		vec = uint4(xyzw);
+		uints = uint4(xyzw);
 		#endif
 	}
 	/**
@@ -87,7 +89,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { x, y, z, w };
 		#else
-		vec = uint4(x, y, z, w);
+		uints = uint4(x, y, z, w);
+		#endif
+	}
+	/**
+	 * @brief Creates a new SIMD 4 component 32bit unsigned integer vector structure.
+	 * @warning This constructor duplicates Z component to the W component!
+	 *
+	 * @param x first vector component value
+	 * @param y second vector component value
+	 * @param z third vector component value
+	 */
+	simd_u32_4(uint32 x, uint32 y, uint32 z) noexcept
+	{
+		#if defined(MATH_SIMD_SUPPORT_SSE)
+		data = _mm_set_epi32((int)z, (int)z, (int)y, (int)x);
+		#elif defined(MATH_SIMD_SUPPORT_NEON)
+		data = { x, y, z, z };
+		#else
+		uints = uint4(x, y, z, z);
 		#endif
 	}
 	/**
@@ -98,14 +118,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 	 */
 	simd_u32_4(simd_u32_4 xyz, uint32 w) noexcept
 	{
-		#if defined(MATH_SIMD_SUPPORT_SSE4_1)
+		#if defined(MATH_SIMD_SUPPORT_AVX2)
 		data = _mm_blend_epi32(xyz.data, _mm_set1_epi32((int)w), 8);
 		#elif defined(MATH_SIMD_SUPPORT_SSE)
-		data = _mm_set_epi32((int)w, (int)xyz.vec.z, (int)xyz.vec.y, (int)xyz.vec.x);
+		data = _mm_set_epi32((int)w, (int)xyz.uints.z, (int)xyz.uints.y, (int)xyz.uints.x);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = vsetq_lane_u32(w, xyz.data, 3);
 		#else
-		vec = xyz.vec; vec.w = w;
+		uints = xyz.uints; uints.w = w;
 		#endif
 	}
 
@@ -128,7 +148,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { v.x, v.y, v.z, v.w };
 		#else
-		vec = v;
+		uints = v;
 		#endif
 	}
 	/**
@@ -142,7 +162,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { v.x, v.y, v.z, v.z };
 		#else
-		vec = uint4(v, v.z);
+		uints = uint4(v, v.z);
 		#endif
 	}
 	/**
@@ -156,7 +176,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		data = { v.x, v.y, v.y, v.y };
 		#else
-		vec = uint4(v, v.y, v.y);
+		uints = uint4(v, v.y, v.y);
 		#endif
 	}
 	/**
@@ -166,11 +186,11 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 	explicit simd_u32_4(const uint4* v) noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		data = _mm_loadu_si128((const _simd_u128*)v);
+		data = _mm_loadu_si128((const __m128i*)v);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		data = vld1q_u32((const uint32*)v);
+		data = vld1q_u32(v);
 		#else
-		vec = *v;
+		uints = *v;
 		#endif
 	}
 	
@@ -179,14 +199,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 	 * @warning Specified vector pointer must be aligned in the memory!!!
 	 * @param[in] v target 4 component vector value pointer (aligned)
 	 */
-	void loadAligned(const uint4* v) noexcept
+	void loadAligned(const uint32* v) noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		data = _mm_load_si128((const _simd_u128*)v);
+		data = _mm_load_si128((const __m128i*)v);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		data = vld1q_u32((const uint32*)v);
+		data = vld1q_u32(v);
 		#else
-		vec = *v;
+		uints = *((const uint4*)v);
 		#endif
 	}
 
@@ -194,14 +214,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 	 * @brief Stores SIMD 4 component 32bit unsigned integer unaligned vector value.
 	 * @param[out] v target 4 component vector value pointer (unaligned)
 	 */
-	void store(uint4* v) noexcept
+	void store(uint32* v) noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
-		_mm_storeu_si128((_simd_u128*)v, data);
+		_mm_storeu_si128((__m128i*)v, data);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
-		vst1q_u32((uint32*)v, data);
+		vst1q_u32(v, data);
 		#else
-		*v = vec;
+		*((uint4*)v) = uints;
 		#endif
 	}
 	/**
@@ -209,14 +229,14 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 	 * @warning Specified vector pointer must be aligned in the memory!!!
 	 * @param[out] v target 4 component vector value pointer (aligned)
 	 */
-	void storeAligned(uint4* v) noexcept
+	void storeAligned(uint32* v) noexcept
 	{
 		#if defined(MATH_SIMD_SUPPORT_SSE)
 		_mm_store_si128((_simd_u128*)v, data);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		vst1q_u32((uint32*)v, data);
 		#else
-		*v = vec;
+		*((uint4*)v) = uints;
 		#endif
 	}
 
@@ -230,7 +250,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_u32(data, 0);
 		#else
-		return vec.x;
+		return uints.x;
 		#endif
 	}
 	/**
@@ -241,7 +261,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#if defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_u32(data, 1);
 		#else
-		return vec.y;
+		return uints.y;
 		#endif
 	}
 	/**
@@ -252,7 +272,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#if defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_u32(data, 2);
 		#else
-		return vec.z;
+		return uints.z;
 		#endif
 	}
 	/**
@@ -263,7 +283,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#if defined(MATH_SIMD_SUPPORT_NEON)
 		return vgetq_lane_u32(data, 3);
 		#else
-		return vec.w;
+		return uints.w;
 		#endif
 	}
 
@@ -271,22 +291,22 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 	 * @brief Sets SIMD vector first component value.
 	 * @param value target X vector component value
 	 */
-	void setX(uint32 value) noexcept { vec.x = value; }
+	void setX(uint32 value) noexcept { uints.x = value; }
 	/**
 	 * @brief Sets SIMD vector second component value.
 	 * @param value target Y vector component value
 	 */
-	void setY(uint32 value) noexcept { vec.y = value; }
+	void setY(uint32 value) noexcept { uints.y = value; }
 	/**
 	 * @brief Sets SIMD vector third component value.
 	 * @param value target Z vector component value
 	 */
-	void setZ(uint32 value) noexcept { vec.z = value; }
+	void setZ(uint32 value) noexcept { uints.z = value; }
 	/**
 	 * @brief Sets SIMD vector fourth component value.
 	 * @param value target W vector component value
 	 */
-	void setW(uint32 value) noexcept { vec.w = value; }
+	void setW(uint32 value) noexcept { uints.w = value; }
 
 	/**
 	 * @brief Swizzles SIMD vector components.
@@ -309,7 +329,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return __builtin_shufflevector(data, data, X, Y, Z, W);
 		#else
-		return simd_u32_4(vec[X], vec[Y], vec[Z], vec[W]);
+		return simd_u32_4(uints[X], uints[Y], uints[Z], uints[W]);
 		#endif
 	}
 
@@ -322,25 +342,25 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 	 * @brief Returns SIMD vector component by index.
 	 * @param i target component index
 	 */
-	uint32& operator[](psize i) noexcept { return vec[i]; }
+	uint32& operator[](psize i) noexcept { return uints[i]; }
 	/**
 	 * @brief Returns SIMD vector component by index.
 	 * @param i target component index
 	 */
-	uint32 operator[](psize i) const noexcept { return vec[i]; }
+	uint32 operator[](psize i) const noexcept { return uints[i]; }
 
 	/**
 	 * @brief Returns SIMD vector as 4 component unsigned integer vector. (xyzw)
 	 */
-	explicit operator uint4() const noexcept { return vec; }
+	explicit operator uint4() const noexcept { return uints; }
 	/**
 	 * @brief Returns SIMD vector as 3 component unsigned integer vector. (xyz)
 	 */
-	explicit operator uint3() const noexcept { return (uint3)vec; }
+	explicit operator uint3() const noexcept { return (uint3)uints; }
 	/**
 	 * @brief Returns SIMD vector as 2 component unsigned integer vector. (xy)
 	 */
-	explicit operator uint2() const noexcept { return (uint2)vec; }
+	explicit operator uint2() const noexcept { return (uint2)uints; }
 	/**
 	 * @brief Returns SIMD first vector component value. (x)
 	 */
@@ -354,7 +374,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vaddq_u32(data, v.data);
 		#else
-		return simd_u32_4(vec + v.vec);
+		return simd_u32_4(uints + v.uints);
 		#endif
 	}
 	simd_u32_4 operator-(simd_u32_4 v) const noexcept
@@ -364,7 +384,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vsubq_u32(data, v.data);
 		#else
-		return simd_u32_4(vec - v.vec);
+		return simd_u32_4(uints - v.uints);
 		#endif
 	}
 	simd_u32_4 operator*(simd_u32_4 v) const noexcept
@@ -374,11 +394,11 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vmulq_u32(data, v.data);
 		#else
-		return simd_u32_4(vec * v.vec);
+		return simd_u32_4(uints * v.uints);
 		#endif
 	}
-	simd_u32_4 operator/(simd_u32_4 v) const noexcept { return simd_u32_4(vec / v.vec); }
-	simd_u32_4 operator%(simd_u32_4 v) const noexcept { return simd_u32_4(vec % v.vec); }
+	simd_u32_4 operator/(simd_u32_4 v) const noexcept { return simd_u32_4(uints / v.uints); }
+	simd_u32_4 operator%(simd_u32_4 v) const noexcept { return simd_u32_4(uints % v.uints); }
 
 	//******************************************************************************************************************
 	simd_u32_4 operator&(simd_u32_4 v) const noexcept
@@ -388,7 +408,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vandq_u32(data, v.data);
 		#else
-		return simd_u32_4(vec & v.vec);
+		return simd_u32_4(uints & v.uints);
 		#endif
 	}
 	simd_u32_4 operator|(simd_u32_4 v) const noexcept
@@ -398,7 +418,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vorrq_u32(data, v.data);
 		#else
-		return simd_u32_4(vec | v.vec);
+		return simd_u32_4(uints | v.uints);
 		#endif
 	}
 	simd_u32_4 operator^(simd_u32_4 v) const noexcept
@@ -408,7 +428,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return veorq_u32(data, v.data);
 		#else
-		return simd_u32_4(vec ^ v.vec);
+		return simd_u32_4(uints ^ v.uints);
 		#endif
 	}
 	simd_u32_4 operator>>(simd_u32_4 v) const noexcept
@@ -418,7 +438,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vshlq_u32(data, vnegq_s32(vreinterpretq_s32_u32(v.data)));
 		#else
-		return simd_u32_4(vec >> v.vec);
+		return simd_u32_4(uints >> v.uints);
 		#endif
 	}
 	simd_u32_4 operator<<(simd_u32_4 v) const noexcept
@@ -428,7 +448,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vshlq_u32(data, vreinterpretq_s32_u32(v.data));
 		#else
-		return simd_u32_4(vec << v.vec);
+		return simd_u32_4(uints << v.uints);
 		#endif
 	}
 	simd_u32_4 operator>>(uint32 n) const noexcept
@@ -438,17 +458,17 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vshrq_n_u32(data, n);
 		#else
-		return simd_u32_4(vec >> n);
+		return simd_u32_4(uints >> n);
 		#endif
 	}
 	simd_u32_4 operator<<(uint32 n) const noexcept
 	{
-		#if defined(MATH_SIMD_SUPPORT_AVX2)
+		#if defined(MATH_SIMD_SUPPORT_SSE)
 		return _mm_slli_epi32(data, n);
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vshlq_n_u32(data, n);
 		#else
-		return simd_u32_4(vec << n);
+		return simd_u32_4(uints << n);
 		#endif
 	}
 	simd_u32_4 operator!() const noexcept
@@ -458,7 +478,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vandq_u32(vceqq_u32(data, vdupq_n_u32(0)), vdupq_n_u32(1));
 		#else
-		return simd_u32_4(!vec);
+		return simd_u32_4(!uints);
 		#endif
 	}
 	simd_u32_4 operator~() const noexcept
@@ -468,11 +488,20 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vmvnq_u32(data);
 		#else
-		return simd_u32_4(~vec);
+		return simd_u32_4(~uints);
 		#endif
 	}
 
 	//******************************************************************************************************************
+	simd_u32_4 operator+(uint32 n) const noexcept { return *this + simd_u32_4(n); }
+	simd_u32_4 operator-(uint32 n) const noexcept { return *this - simd_u32_4(n); }
+	simd_u32_4 operator*(uint32 n) const noexcept { return *this * simd_u32_4(n); }
+	simd_u32_4 operator/(uint32 n) const noexcept { return *this / simd_u32_4(n); }
+	simd_u32_4 operator%(uint32 n) const noexcept { return *this % simd_u32_4(n); }
+	simd_u32_4 operator&(uint32 n) const noexcept { return *this & simd_u32_4(n); }
+	simd_u32_4 operator|(uint32 n) const noexcept { return *this | simd_u32_4(n); }
+	simd_u32_4 operator^(uint32 n) const noexcept { return *this ^ simd_u32_4(n); }
+
 	simd_u32_4& operator+=(simd_u32_4 v) noexcept { *this = *this + v; return *this; }
 	simd_u32_4& operator-=(simd_u32_4 v) noexcept { *this = *this - v; return *this; }
 	simd_u32_4& operator*=(simd_u32_4 v) noexcept { *this = *this * v; return *this; }
@@ -504,7 +533,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vminvq_u32(vceqq_u32(data, v.data)) == 0xFFFFFFFFu;
 		#else
-		return vec == v.vec;
+		return uints == v.uints;
 		#endif
 	}
 	bool operator!=(simd_u32_4 v) const noexcept
@@ -514,7 +543,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return !vminvq_u32(vceqq_u32(data, v.data));
 		#else
-		return vec != v.vec;
+		return uints != v.uints;
 		#endif
 	}
 	simd_u32_4 operator<(simd_u32_4 v) const noexcept
@@ -524,7 +553,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcltq_u32(data, v.data);
 		#else
-		return simd_u32_4(vec < v.vec);
+		return simd_u32_4(uints < v.uints);
 		#endif
 	}
 	simd_u32_4 operator>(simd_u32_4 v) const noexcept
@@ -534,7 +563,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcgtq_u32(data, v.data);
 		#else
-		return simd_u32_4(vec > v.vec);
+		return simd_u32_4(uints > v.uints);
 		#endif
 	}
 	simd_u32_4 operator<=(simd_u32_4 v) const noexcept
@@ -544,7 +573,7 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcleq_u32(data, v.data);
 		#else
-		return simd_u32_4(vec <= v.vec);
+		return simd_u32_4(uints <= v.uints);
 		#endif
 	}
 	simd_u32_4 operator>=(simd_u32_4 v) const noexcept
@@ -554,10 +583,17 @@ struct [[nodiscard]] alignas(MATH_SIMD_VECTOR_ALIGNMENT) simd_u32_4
 		#elif defined(MATH_SIMD_SUPPORT_NEON)
 		return vcgeq_u32(data, v.data);
 		#else
-		return simd_u32_4(vec >= v.vec);
+		return simd_u32_4(uints >= v.uints);
 		#endif
 	}
 
+	bool operator==(uint32 n) const noexcept { return *this == simd_u32_4(n); }
+	bool operator!=(uint32 n) const noexcept { return *this != simd_u32_4(n); }
+	simd_u32_4 operator<(uint32 n) const noexcept { return *this < simd_u32_4(n); }
+	simd_u32_4 operator>(uint32 n) const noexcept { return *this > simd_u32_4(n); }
+	simd_u32_4 operator<=(uint32 n) const noexcept { return *this <= simd_u32_4(n); }
+	simd_u32_4 operator>=(uint32 n) const noexcept { return *this >= simd_u32_4(n); }
+
 	static const simd_u32_4 zero, one, max;
 };
 
@@ -593,7 +629,7 @@ static int getTrues(simd_u32_4 v) noexcept
 	int32x4_t shift = { 0, 1, 2, 3 };
 	return vaddvq_u32(vshlq_u32(vshrq_n_u32(v.data, 31), shift));
 	#else
-	return getTrues(v.vec);
+	return getTrues(v.uints);
 	#endif
 }
 /**
@@ -606,7 +642,7 @@ static bool areAllTrue(simd_u32_4 v) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vminvq_u32(v.data) == 0xFFFFFFFFu;
 	#else
-	return areAllTrue(v.vec);
+	return areAllTrue(v.uints);
 	#endif
 }
 /**
@@ -621,7 +657,7 @@ static bool areAllFalse(simd_u32_4 v) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return !vmaxvq_u32(v.data);
 	#else
-	return areAllFalse(v.vec);
+	return areAllFalse(v.uints);
 	#endif
 }
 /**
@@ -636,7 +672,7 @@ static bool areAnyTrue(simd_u32_4 v) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vmaxvq_u32(v.data);
 	#else
-	return areAnyTrue(v.vec);
+	return areAnyTrue(v.uints);
 	#endif
 }
 /**
@@ -649,7 +685,7 @@ static bool areAnyFalse(simd_u32_4 v) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vminvq_u32(v.data) != 0xFFFFFFFFu;
 	#else
-	return areAnyFalse(v.vec);
+	return areAnyFalse(v.uints);
 	#endif
 }
 
@@ -659,16 +695,50 @@ static bool areAnyFalse(simd_u32_4 v) noexcept
  * @param a first SIMD vector to compare
  * @param b second SIMD vector to compare
  */
-static simd_u32_4 compare(simd_u32_4 a, simd_u32_4 b) noexcept
+static simd_u32_4 equal(simd_u32_4 a, simd_u32_4 b) noexcept
 {
 	#if defined(MATH_SIMD_SUPPORT_SSE)
 	return _mm_cmpeq_epi32(a.data, b.data);
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vceqq_u32(a.data, b.data);
 	#else
-	return simd_u32_4(compare(a.vec, b.vec));
+	return simd_u32_4(equal(a.uints, b.uints));
+	#endif
+}
+/**
+ * @brief Compares two SIMD vectors component wise if they are not equal.
+ *
+ * @param a first SIMD vector to compare
+ * @param b second SIMD vector to compare
+ */
+static simd_u32_4 notEqual(simd_u32_4 a, simd_u32_4 b) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	return _mm_xor_si128(_mm_cmpeq_epi32(a.data, b.data), _mm_set1_epi32(0xFFFFFFFF));
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	return vmvnq_u32(vceqq_u32(a.data, b.data));
+	#else
+	return simd_u32_4(notEqual(a.uints, b.uints));
 	#endif
 }
+
+/**
+ * @brief SIMD vector bitwise bit clear component wise. [r = a & (~b)]
+ *
+ * @param a first SIMD vector
+ * @param b second SIMD vector
+ */
+static simd_u32_4 bic(simd_u32_4 a, simd_u32_4 b) noexcept
+{
+	#if defined(MATH_SIMD_SUPPORT_SSE)
+	return _mm_andnot_si128(b.data, a.data); // flipped
+	#elif defined(MATH_SIMD_SUPPORT_NEON)
+	return vbicq_u32(a.data, b.data);
+	#else
+	return simd_u32_4(a.uints & (~b.uints));
+	#endif
+}
+
 /**
  * @brief Returns true if first SIMD vector binary representation is less than the second.
  *
@@ -697,7 +767,7 @@ static simd_u32_4 select(simd_u32_4 c, simd_u32_4 t, simd_u32_4 f) noexcept
 	return vbslq_u32(vreinterpretq_u32_s32(vshrq_n_s32(
 		vreinterpretq_s32_u32(c.data), 31)), t.data, f.data);
 	#else
-	return simd_u32_4(select(c.vec, t.vec, f.vec));
+	return simd_u32_4(select(c.uints, t.uints, f.uints));
 	#endif
 }
 
@@ -714,7 +784,7 @@ static simd_u32_4 min(simd_u32_4 a, simd_u32_4 b) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vminq_u32(a.data, b.data);
 	#else
-	return simd_u32_4(min(a.vec, b.vec));
+	return simd_u32_4(min(a.uints, b.uints));
 	#endif
 }
 /**
@@ -730,7 +800,7 @@ static simd_u32_4 max(simd_u32_4 a, simd_u32_4 b) noexcept
 	#elif defined(MATH_SIMD_SUPPORT_NEON)
 	return vmaxq_u32(a.data, b.data);
 	#else
-	return simd_u32_4(max(a.vec, b.vec));
+	return simd_u32_4(max(a.uints, b.uints));
 	#endif
 }
 
diff --git a/include/math/sphere.hpp b/include/math/sphere.hpp
index 9b8a5c0..28a780c 100644
--- a/include/math/sphere.hpp
+++ b/include/math/sphere.hpp
@@ -30,17 +30,37 @@ namespace math
  */
 struct [[nodiscard]] Sphere
 {
-	float3 position;
-	float radius;
-
+protected:
+	simd_f32_4 posRad;
+public:
 	/**
 	 * @brief Creates a new sphere structure.
 	 *
 	 * @param radius target sphere radius
-	 * @param[in] position sphere position in 3D space
+	 * @param position sphere position in 3D space
+	 */
+	Sphere(float radius = 0.0f, simd_f32_4 position = simd_f32_4::zero) noexcept :
+		posRad(position) { posRad.setW(radius); }
+
+	/**
+	 * @brief Returns sphere radius.
+	 */
+	float getRadius() const noexcept { return posRad.getW(); }
+	/**
+	 * @brief Sets sphere radius.
+	 * @param radius target sphere radius
+	 */
+	void setRadius(float radius) noexcept { return posRad.setW(radius); }
+	
+	/**
+	 * @brief Returns position of the sphere in 3D space.
+	 */
+	const simd_f32_4& getPosition() const noexcept { return posRad; }
+	/**
+	 * @brief Sets position of the sphere in 3D space.
+	 * @param position target sphere position in 3D space
 	 */
-	constexpr Sphere(float radius = 0.0f, const float3& position = float3::zero) noexcept :
-		position(position), radius(radius) { }
+	void setPosition(simd_f32_4 position) noexcept { posRad = simd_f32_4(position, posRad.getW()); }
 
 	static const Sphere one, two, half;
 };
@@ -50,27 +70,28 @@ inline const Sphere Sphere::two = Sphere(1.0f);
 inline const Sphere Sphere::half = Sphere(0.25f);
 
 /**
- * @brief Returns true if point is inside the sphere.
+ * @brief Returns true if point is inside the sphere in 3D space.
  *
  * @param[in] sphere target sphere to check
- * @param[in] point target point in the space
+ * @param point target point in 3D space
  */
-static constexpr bool isInside(const Sphere& sphere, const float3& point) noexcept
+static bool isInside(const Sphere& sphere, simd_f32_4 point) noexcept
 {
-	auto difference = sphere.position - point;
-  	return length2(difference) < sphere.radius * sphere.radius;
+	auto difference = sphere.getPosition() - point;
+	auto radius = sphere.getRadius();
+	return lengthSq3(difference) < radius * radius;
 }
 
 /**
- * @brief Calculates closest point on sphere to the specified one.
+ * @brief Calculates closest point on sphere to the specified one in 3D space.
  *
  * @param[in] sphere target sphere to use
- * @param[in] point target point in 3D space
+ * @param point target point in 3D space
  */
-static float3 closestPoint(const Sphere& sphere, const float3& point) noexcept
+static simd_f32_4 closestPoint(const Sphere& sphere, simd_f32_4 point) noexcept
 {
-    auto sphereToPoint = normalize(point - sphere.position);
-    return sphere.position + sphereToPoint * sphere.radius;
+	auto sphereToPoint = normalize3(point - sphere.getPosition());
+	return sphere.getPosition() + sphereToPoint * sphere.getRadius();
 }
 
 /**
@@ -79,11 +100,11 @@ static float3 closestPoint(const Sphere& sphere, const float3& point) noexcept
  * @param[in] a first sphere to check
  * @param[in] b second sphere to check
  */
-static constexpr bool isIntersected(const Sphere& a, const Sphere& b) noexcept
+static bool isIntersected(const Sphere& a, const Sphere& b) noexcept
 {
-    auto d = a.position - b.position;
-    auto s = a.radius + b.radius;
-    return length2(d) <= s * s;
+	auto d = a.getPosition() - b.getPosition();
+	auto s = a.getRadius() + b.getRadius();
+	return lengthSq3(d) <= s * s;
 }
 /**
  * @brief Returns true if sphere intersects AABB.
@@ -93,9 +114,9 @@ static constexpr bool isIntersected(const Sphere& a, const Sphere& b) noexcept
  */
 static bool isIntersected(const Sphere& a, const Aabb& b) noexcept
 {
-	auto c = closestPoint(b, a.position);
-	auto d2 = length2(a.position - c);
-	return d2 < a.radius * a.radius;
+	auto c = closestPoint(b, a.getPosition());
+	auto d2 = lengthSq3(a.getPosition() - c), radius = a.getRadius();
+	return d2 < radius * radius;
 }
 
 } // namespace math
\ No newline at end of file
diff --git a/include/math/tone-mapping.hpp b/include/math/tone-mapping.hpp
index 078663f..5575d95 100644
--- a/include/math/tone-mapping.hpp
+++ b/include/math/tone-mapping.hpp
@@ -34,28 +34,28 @@ namespace math
 constexpr float defaultGamma = 2.2f;
 
 /**
- * @brief Applies gamma correction to the specified color.
+ * @brief Applies gamma correction to the specified color. (Fast approximation)
  * 
  * @details
  * Gamma correction is used to adjust the brightness of an image or display to match the 
  * nonlinear response of display devices, such as monitors. It compensates for the fact that 
  * displays do not linearly represent the light intensity of a color.
  * 
- * @param[in] color target color to gamma correct
+ * @param color target color to gamma correct
  * @param invGamma inverse gamma correction value (1.0/x)
  */
-static float3 gammaCorrection(const float3& color, float invGamma) noexcept
+static simd_f32_4 fastGammaCorrection(simd_f32_4 color, float invGamma) noexcept
 {
-	return pow(color, float3(invGamma));
+	return simd_f32_4(fastPow(color, simd_f32_4(invGamma)), color.getW());
 }
 /**
- * @brief Applies gamma correction to the specified color.
+ * @brief Applies gamma correction to the specified color. (Fast approximation)
  * @details See the @ref gammaCorrection().
- * @param[in] color target color to gamma correct
+ * @param color target color to gamma correct
  */
-static float3 gammaCorrection(const float3& color) noexcept
+static simd_f32_4 fastGammaCorrection(simd_f32_4 color) noexcept
 {
-	return pow(color, float3(1.0f / defaultGamma));
+	return simd_f32_4(fastPow(color, simd_f32_4(1.0f / defaultGamma)), color.getW());
 }
 
 } // namespace math
\ No newline at end of file
diff --git a/include/math/triangle.hpp b/include/math/triangle.hpp
index 7a44d89..1d32bf3 100644
--- a/include/math/triangle.hpp
+++ b/include/math/triangle.hpp
@@ -40,7 +40,7 @@ struct [[nodiscard]] Triangle
 	simd_f32_4 p2 = simd_f32_4::zero; /**< Third triangle vertex position. */
 
 	/**
-	 * @brief Creates a new triangle (polygon) structure.
+	 * @brief Creates a new triangle (polygon) structure. (In 3D space)
 	 *
 	 * @param p0 first triangle vertex position in 3D space
 	 * @param p1 second triangle vertex position in 3D space
@@ -63,10 +63,10 @@ struct [[nodiscard]] Triangle
 };
 
 /**
- * @brief Returns true if point is inside the triangle.
+ * @brief Returns true if point is inside the triangle in 3D space.
  *
  * @param[in] sphere target triangle to check
- * @param[in] point target point in the space
+ * @param point target point in 3D space
  */
 static bool isInside(const Triangle& triangle, simd_f32_4 point) noexcept
 {
@@ -78,11 +78,11 @@ static bool isInside(const Triangle& triangle, simd_f32_4 point) noexcept
 }
 
 /**
- * @brief Returns triangle barycentric.
+ * @brief Returns triangle barycentric in 3D space.
  * @details Way of expressing the position of a point relative to a triangle.
  *
  * @param[in] triangle target triangle to use
- * @param[in] point target point in space
+ * @param point target point in 3D space
  */
 static simd_f32_4 calcBarycentric(const Triangle& triangle, simd_f32_4 point) noexcept
 {
@@ -91,7 +91,7 @@ static simd_f32_4 calcBarycentric(const Triangle& triangle, simd_f32_4 point) no
 	auto v0 = triangle.p1 - p0, v1 = triangle.p2 - p0, v2 = point - p0;
 	auto d00 = dot3(v0, v0), d01 = dot3(v0, v1), d11 = dot3(v1, v1);
 	auto d20 = dot3(v2, v0), d21 = dot3(v2, v1);
-	auto invDenom = 1.0f / (d00 * d11 - d01 * d01); // TODO: maybe use vectors for these calculations?
+	auto invDenom = 1.0f / (d00 * d11 - d01 * d01);
 	auto v = (d11 * d20 - d01 * d21) * invDenom;
 	auto w = (d00 * d21 - d01 * d20) * invDenom;
 	return simd_f32_4(1.0f - v - w, v, w, 0.0f);
diff --git a/include/math/vector/float.hpp b/include/math/vector/float.hpp
index 6d62355..266a60f 100644
--- a/include/math/vector/float.hpp
+++ b/include/math/vector/float.hpp
@@ -535,10 +535,21 @@ static string toString(float2 v) { return to_string(v.x) + " " + to_string(v.y);
  * @param a first vector to compare
  * @param b second vector to compare
  */
-static uint2 compare(float2 a, float4 b) noexcept
+static uint2 equal(float2 a, float4 b) noexcept
 {
 	return uint2(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0);
 }
+/**
+ * @brief Compares two vectors component wise if they are not equal.
+ *
+ * @param a first vector to compare
+ * @param b second vector to compare
+ */
+static uint2 notEqual(float2 a, float4 b) noexcept
+{
+	return uint2(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0);
+}
+
 /**
  * @brief Returns true if first vector binary representation is less than the second.
  *
@@ -708,7 +719,9 @@ static constexpr bool isClose(float2 a, float2 b, float maxDistSq = 1.0e-12f) no
 static float2 normalize(float2 v) noexcept { return v * (1.0f / length(v)); }
 /**
  * @brief Returns true if vector is normalized with specified tolerance.
+ * 
  * @param v target vector to check
+ * @param tolerance floating point precision tolerance
  */
 static bool isNormalized(float2 v, float tolerance = 1.0e-6f) noexcept
 {
@@ -787,10 +800,21 @@ static string toString(float3 v) { return to_string(v.x) + " " + to_string(v.y)
  * @param a first vector to compare
  * @param b second vector to compare
  */
-static uint3 compare(float3 a, float3 b) noexcept
+static uint3 equal(float3 a, float3 b) noexcept
 {
 	return uint3(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, a.z == b.z ? UINT32_MAX : 0);
 }
+/**
+ * @brief Compares two vectors component wise if they are not equal.
+ *
+ * @param a first vector to compare
+ * @param b second vector to compare
+ */
+static uint3 notEqual(float3 a, float3 b) noexcept
+{
+	return uint3(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, a.z != b.z ? UINT32_MAX : 0);
+}
+
 /**
  * @brief Returns true if first vector binary representation is less than the second.
  *
@@ -979,7 +1003,9 @@ static constexpr bool isClose(float3 a, float3 b, float maxDistSq = 1.0e-12f) no
 static float3 normalize(float3 v) noexcept { return v * (1.0f / length(v)); }
 /**
  * @brief Returns true if vector is normalized with specified tolerance.
+ * 
  * @param v target vector to check
+ * @param tolerance floating point precision tolerance
  */
 static bool isNormalized(float3 v, float tolerance = 1.0e-6f) noexcept
 {
@@ -1064,11 +1090,23 @@ static string toString(float4 v)
  * @param a first vector to compare
  * @param b second vector to compare
  */
-static uint4 compare(float4 a, float4 b) noexcept
+static uint4 equal(float4 a, float4 b) noexcept
 {
 	return uint4(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, 
 		a.z == b.z ? UINT32_MAX : 0, a.w == b.w ? UINT32_MAX : 0);
 }
+/**
+ * @brief Compares two vectors component wise if they are not equal.
+ *
+ * @param a first vector to compare
+ * @param b second vector to compare
+ */
+static uint4 notEqual(float4 a, float4 b) noexcept
+{
+	return uint4(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, 
+		a.z != b.z ? UINT32_MAX : 0, a.w != b.w ? UINT32_MAX : 0);
+}
+
 /**
  * @brief Returns true if first vector binary representation is less than the second.
  *
@@ -1266,7 +1304,9 @@ static constexpr bool isClose(float4 a, float4 b, float maxDistSq = 1.0e-12f) no
 static float4 normalize(float4 v) noexcept { return v * (1.0f / length(v)); }
 /**
  * @brief Returns true if vector is normalized with specified tolerance.
+ * 
  * @param v target vector to check
+ * @param tolerance floating point precision tolerance
  */
 static bool isNormalized(float4 v, float tolerance = 1.0e-6f) noexcept
 {
diff --git a/include/math/vector/int.hpp b/include/math/vector/int.hpp
index ce7748a..5f60ace 100644
--- a/include/math/vector/int.hpp
+++ b/include/math/vector/int.hpp
@@ -566,10 +566,21 @@ static string toString(int2 v) { return to_string(v.x) + " " + to_string(v.y); }
  * @param a first vector to compare
  * @param b second vector to compare
  */
-static uint2 compare(int2 a, int2 b) noexcept
+static uint2 equal(int2 a, int2 b) noexcept
 {
 	return uint2(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0);
 }
+/**
+ * @brief Compares two vectors component wise if they are not equal.
+ *
+ * @param a first vector to compare
+ * @param b second vector to compare
+ */
+static uint2 notEqual(int2 a, int2 b) noexcept
+{
+	return uint2(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0);
+}
+
 /**
  * @brief Returns true if first vector binary representation is less than the second.
  *
@@ -663,10 +674,21 @@ static string toString(int3 v) { return to_string(v.x) + " " + to_string(v.y) +
  * @param a first vector to compare
  * @param b second vector to compare
  */
-static uint3 compare(int3 a, int3 b) noexcept
+static uint3 equal(int3 a, int3 b) noexcept
 {
 	return uint3(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, a.z == b.z ? UINT32_MAX : 0);
 }
+/**
+ * @brief Compares two vectors component wise if they are not equal.
+ *
+ * @param a first vector to compare
+ * @param b second vector to compare
+ */
+static uint3 notEqual(int3 a, int3 b) noexcept
+{
+	return uint3(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, a.z != b.z ? UINT32_MAX : 0);
+}
+
 /**
  * @brief Returns true if first vector binary representation is less than the second.
  *
@@ -769,11 +791,23 @@ static string toString(int4 v)
  * @param a first vector to compare
  * @param b second vector to compare
  */
-static uint4 compare(int4 a, int4 b) noexcept
+static uint4 equal(int4 a, int4 b) noexcept
 {
 	return uint4(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, 
 		a.z == b.z ? UINT32_MAX : 0, a.w == b.w ? UINT32_MAX : 0);
 }
+/**
+ * @brief Compares two vectors component wise if they are not equal.
+ *
+ * @param a first vector to compare
+ * @param b second vector to compare
+ */
+static uint4 notEqual(int4 a, int4 b) noexcept
+{
+	return uint4(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, 
+		a.z != b.z ? UINT32_MAX : 0, a.w != b.w ? UINT32_MAX : 0);
+}
+
 /**
  * @brief Returns true if first vector binary representation is less than the second.
  *
diff --git a/include/math/vector/uint.hpp b/include/math/vector/uint.hpp
index 5dfd2b3..7032ee5 100644
--- a/include/math/vector/uint.hpp
+++ b/include/math/vector/uint.hpp
@@ -539,10 +539,21 @@ static constexpr bool areAnyFalse(uint2 v) noexcept { return (v.x & v.y) != UINT
  * @param a first vector to compare
  * @param b second vector to compare
  */
-static uint2 compare(uint2 a, uint2 b) noexcept
+static uint2 equal(uint2 a, uint2 b) noexcept
 {
 	return uint2(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0);
 }
+/**
+ * @brief Compares two vectors component wise if they are not equal.
+ *
+ * @param a first vector to compare
+ * @param b second vector to compare
+ */
+static uint2 notEqual(uint2 a, uint2 b) noexcept
+{
+	return uint2(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0);
+}
+
 /**
  * @brief Returns true if first vector binary representation is less than the second.
  *
@@ -666,10 +677,21 @@ static constexpr bool areAnyFalse(uint3 v) noexcept { return (v.x & v.y & v.z) !
  * @param a first vector to compare
  * @param b second vector to compare
  */
-static uint3 compare(uint3 a, uint3 b) noexcept
+static uint3 equal(uint3 a, uint3 b) noexcept
 {
 	return uint3(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, a.z == b.z ? UINT32_MAX : 0);
 }
+/**
+ * @brief Compares two vectors component wise if they are not equal.
+ *
+ * @param a first vector to compare
+ * @param b second vector to compare
+ */
+static uint3 notEqual(uint3 a, uint3 b) noexcept
+{
+	return uint3(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, a.z != b.z ? UINT32_MAX : 0);
+}
+
 /**
  * @brief Returns true if first vector binary representation is less than the second.
  *
@@ -796,11 +818,23 @@ static constexpr bool areAnyFalse(uint4 v) noexcept { return (v.x & v.y & v.z &
  * @param a first vector to compare
  * @param b second vector to compare
  */
-static uint4 compare(uint4 a, uint4 b) noexcept
+static uint4 equal(uint4 a, uint4 b) noexcept
 {
 	return uint4(a.x == b.x ? UINT32_MAX : 0, a.y == b.y ? UINT32_MAX : 0, 
 		a.z == b.z ? UINT32_MAX : 0, a.w == b.w ? UINT32_MAX : 0);
 }
+/**
+ * @brief Compares two vectors component wise if they are not equal.
+ *
+ * @param a first vector to compare
+ * @param b second vector to compare
+ */
+static uint4 notEqual(uint4 a, uint4 b) noexcept
+{
+	return uint4(a.x != b.x ? UINT32_MAX : 0, a.y != b.y ? UINT32_MAX : 0, 
+		a.z != b.z ? UINT32_MAX : 0, a.w != b.w ? UINT32_MAX : 0);
+}
+
 /**
  * @brief Returns true if first vector binary representation is less than the second.
  *
diff --git a/source/aabb.cpp b/source/aabb.cpp
index b00676d..0f6736b 100644
--- a/source/aabb.cpp
+++ b/source/aabb.cpp
@@ -15,191 +15,79 @@
 #include "math/aabb.hpp"
 
 //**********************************************************************************************************************
-math::float2 math::raycast2(const Aabb& aabb, const Ray& ray) noexcept
-{/*
-	auto origin = ray.origin;
-	auto invDirection = float3::one / ray.getDirection();
-	auto aabbMin = aabb.getMin(), aabbMax = aabb.getMax();
-
-	auto t1 = (aabbMin.x - origin.x) * invDirection.x;
-	auto t2 = (aabbMax.x - origin.x) * invDirection.x;
-	auto t3 = (aabbMin.y - origin.y) * invDirection.y;
-	auto t4 = (aabbMax.y - origin.y) * invDirection.y;
-	auto t5 = (aabbMin.z - origin.z) * invDirection.z;
-	auto t6 = (aabbMax.z - origin.z) * invDirection.z;
-
-	auto tMin = std::max(std::max(std::min(t1, t2), std::min(t3, t4)), std::min(t5, t6));
-	auto tMax = std::min(std::min(std::max(t1, t2), std::max(t3, t4)), std::max(t5, t6));
-	return float2(tMin, tMax);
-	*/
-	return float2::zero;
-}
-math::float2 math::raycast2I(const Aabb& aabb, const Ray& ray) noexcept
-{
-	/*
-	auto origin = ray.origin;
-	auto invDirection = ray.getDirection();
-	auto aabbMin = aabb.getMin(), aabbMax = aabb.getMax();
-
-	auto t1 = (aabbMin.x - origin.x) * invDirection.x;
-	auto t2 = (aabbMax.x - origin.x) * invDirection.x;
-	auto t3 = (aabbMin.y - origin.y) * invDirection.y;
-	auto t4 = (aabbMax.y - origin.y) * invDirection.y;
-	auto t5 = (aabbMin.z - origin.z) * invDirection.z;
-	auto t6 = (aabbMax.z - origin.z) * invDirection.z;
-
-	auto tMin = std::max(std::max(std::min(t1, t2), std::min(t3, t4)), std::min(t5, t6));
-	auto tMax = std::min(std::min(std::max(t1, t2), std::max(t3, t4)), std::max(t5, t6));
-	return float2(tMin, tMax);
-	*/
-	return float2::zero;
-}
-
-float math::raycast1(const Aabb& aabb, const Ray& ray) noexcept
-{
-	/*
-	auto origin = ray.origin;
-	auto invDirection = float3::one / ray.getDirection();
-	auto aabbMin = aabb.getMin(), aabbMax = aabb.getMax();
-
-	auto t1 = (aabbMin.x - origin.x) * invDirection.x;
-	auto t2 = (aabbMax.x - origin.x) * invDirection.x;
-	auto t3 = (aabbMin.y - origin.y) * invDirection.y;
-	auto t4 = (aabbMax.y - origin.y) * invDirection.y;
-	auto t5 = (aabbMin.z - origin.z) * invDirection.z;
-	auto t6 = (aabbMax.z - origin.z) * invDirection.z;
-
-	return std::max(std::max(std::min(t1, t2), std::min(t3, t4)), std::min(t5, t6));
-	*/
-	return {};
-}
-float math::raycast1I(const Aabb& aabb, const Ray& ray) noexcept
-{
-	/*
-	auto origin = ray.origin;
-	auto invDirection = ray.getDirection();
-	auto aabbMin = aabb.getMin(), aabbMax = aabb.getMax();
-
-	auto t1 = (aabbMin.x - origin.x) * invDirection.x;
-	auto t2 = (aabbMax.x - origin.x) * invDirection.x;
-	auto t3 = (aabbMin.y - origin.y) * invDirection.y;
-	auto t4 = (aabbMax.y - origin.y) * invDirection.y;
-	auto t5 = (aabbMin.z - origin.z) * invDirection.z;
-	auto t6 = (aabbMax.z - origin.z) * invDirection.z;
-
-	return std::max(std::max(std::min(t1, t2), std::min(t3, t4)), std::min(t5, t6));
-	*/
-	return {};
-}
-
-//**********************************************************************************************************************
-bool math::isAabbIntersected(const float3& center, float3 extent, const Triangle& triangle) noexcept
+bool math::isAabbIntersected(simd_f32_4 position, simd_f32_4 extent, const Triangle& triangle) noexcept
 {
-	/*
-	auto v0 = triangle.p0 - center;
-	auto v1 = triangle.p1 - center;
-	auto v2 = triangle.p2 - center;
-	auto f0 = v1 - v0, f1 = v2 - v1, f2 = v0 - v2;
-
-	auto a = float3(0.0f, -f0.z, f0.y);
-	auto p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a);
-	auto r = extent.y * std::abs(f0.z) + extent.z * std::abs(f0.y);
-	if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r)
-		return false;
-
-	a = float3(0.0f, -f1.z, f1.y);
-	p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a);
-	r = extent.y * std::abs(f1.z) + extent.z * std::abs(f1.y);
-	if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r)
+	auto v = simd_f32_4x4(triangle.p0 - position, 
+		triangle.p1 - position, triangle.p2 - position, simd_f32_4::zero);
+	auto f0 = v.c1 - v.c0, f1 = v.c2 - v.c1, f2 = v.c0 - v.c2;
+	auto f0X = f0.getX(), f0Y = f0.getY(), f0Z = f0.getZ();
+	auto f1X = f1.getX(), f1Y = f1.getY(), f1Z = f1.getZ();
+	auto f2X = f2.getX(), f2Y = f2.getY(), f2Z = f2.getZ();
+	auto extX = extent.getX(), extY = extent.getY(), extZ = extent.getZ();
+
+	auto a = simd_f32_4(0.0f, -f0Z, f0Y);
+	auto p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a));
+	auto r = extY * std::abs(f0Z) + extZ * std::abs(f0Y);
+	if (std::max(-max3(p), min3(p)) > r)
 		return false;
 
-	a = float3(0.0f, -f2.z, f2.y);
-	p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a);
-	r = extent.y * std::abs(f2.z) + extent.z * std::abs(f2.y);
-	if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r)
+	a = simd_f32_4(0.0f, -f1Z, f1Y);
+	p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a));
+	r = extY * std::abs(f1Z) + extZ * std::abs(f1Y);
+	if (std::max(-max3(p), min3(p)) > r)
 		return false;
 
-	a = float3(f0.z, 0.0f, -f0.x);
-	p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a);
-	r = extent.x * std::abs(f0.z) + extent.z * std::abs(f0.x);
-	if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r)
+	a = simd_f32_4(0.0f, -f2Z, f2Y);
+	p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a));
+	r = extY * std::abs(f2Z) + extZ * std::abs(f2Y);
+	if (std::max(-max3(p), min3(p)) > r)
 		return false;
 
-	a = float3(f1.z, 0.0f, -f1.x);
-	p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a);
-	r = extent.x * std::abs(f1.z) + extent.z * std::abs(f1.x);
-	if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r)
+	a = simd_f32_4(f0Z, 0.0f, -f0X);
+	p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a));
+	r = extX * std::abs(f0Z) + extZ * std::abs(f0X);
+	if (std::max(-max3(p), min3(p)) > r)
 		return false;
 
-	a = float3(f2.z, 0.0f, -f2.x);
-	p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a);
-	r = extent.x * std::abs(f2.z) + extent.z * std::abs(f2.x);
-	if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r)
+	a = simd_f32_4(f1Z, 0.0f, -f1X);
+	p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a));
+	r = extX * std::abs(f1Z) + extZ * std::abs(f1X);
+	if (std::max(-max3(p), min3(p)) > r)
 		return false;
 
-	a = float3(-f0.y, f0.x, 0.0f);
-	p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a);
-	r = extent.x * std::abs(f0.y) + extent.y * std::abs(f0.x);
-	if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r)
+	a = simd_f32_4(f2Z, 0.0f, -f2X);
+	p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a));
+	r = extX * std::abs(f2Z) + extZ * std::abs(f2X);
+	if (std::max(-max3(p), min3(p)) > r)
 		return false;
 
-	a = float3(-f1.y, f1.x, 0.0f);
-	p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a);
-	r = extent.x * std::abs(f1.y) + extent.y * std::abs(f1.x);
-	if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r)
+	a = simd_f32_4(-f0Y, f0X, 0.0f);
+	p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a));
+	r = extX * std::abs(f0Y) + extY * std::abs(f0X);
+	if (std::max(-max3(p), min3(p)) > r)
 		return false;
 
-	a = float3(-f2.y, f2.x, 0.0f);
-	p0 = dot(v0, a), p1 = dot(v1, a), p2 = dot(v2, a);
-	r = extent.x * std::abs(f2.y) + extent.y * std::abs(f2.x);
-	if (std::max(-max(p0, p1, p2), min(p0, p1, p2)) > r)
+	a = simd_f32_4(-f1Y, f1X, 0.0f);
+	p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a));
+	r = extX * std::abs(f1Y) + extY * std::abs(f1X);
+	if (std::max(-max3(p), min3(p)) > r)
 		return false;
 
-	if (max(v0.x, v1.x, v2.x) < -extent.x || min(v0.x, v1.x, v2.x) > extent.x)
-		return false;
-	if (max(v0.y, v1.y, v2.y) < -extent.y || min(v0.y, v1.y, v2.y) > extent.y)
+	a = simd_f32_4(-f2Y, f2X, 0.0f);
+	p = simd_f32_4(dot3(v.c0, a), dot3(v.c1, a), dot3(v.c2, a));
+	r = extX * std::abs(f2Y) + extY * std::abs(f2X);
+	if (std::max(-max3(p), min3(p)) > r)
 		return false;
-	if (max(v0.z, v1.z, v2.z) < -extent.z || min(v0.z, v1.z, v2.z) > extent.z)
-		return false;
-
-	auto normal = cross(f0, f1);
-	auto distance = std::abs(dot(normal, v0));
-	r = extent.x * std::abs(normal.x) + extent.y * std::abs(normal.y) + extent.z * std::abs(normal.z);
-	return distance <= r;
-	*/
-	return {};
-}
 
-//**********************************************************************************************************************
-bool math::isBehindFrustum(const Aabb& aabb, const float4x4& model, const Plane* planes, uint8 planeCount) noexcept
-{
-	/*
-	auto min = aabb.getMin(), max = aabb.getMax();
-	auto v0 = model * float4(min, 1.0f);
-	auto v1 = model * float4(min.x, min.y, max.z, 1.0f);
-	auto v2 = model * float4(min.x, max.y, min.z, 1.0f);
-	auto v3 = model * float4(min.x, max.y, max.z, 1.0f);
-	auto v4 = model * float4(max.x, min.y, min.z, 1.0f);
-	auto v5 = model * float4(max.x, min.y, max.z, 1.0f);
-	auto v6 = model * float4(max.x, max.y, min.z, 1.0f);
-	auto v7 = model * float4(max, 1.0f);
-
-	for (uint8 i = 0; i < planeCount; i++)
+	auto t = transpose3x3(v);
+	if (max3(t.c0) < -extX || min3(t.c0) > extX || max3(t.c1) < -extY || 
+		min3(t.c1) > extY || max3(t.c2) < -extZ || min3(t.c2) > extZ)
 	{
-		auto plane = planes[i];
-		auto distance0 = distance(plane, v0), distance1 = distance(plane, v1);
-		auto distance2 = distance(plane, v2), distance3 = distance(plane, v3);
-		auto distance4 = distance(plane, v4), distance5 = distance(plane, v5);
-		auto distance6 = distance(plane, v6), distance7 = distance(plane, v7);
-
-		if (distance0 < 0.0f && distance1 < 0.0f && distance2 < 0.0f && distance3 < 0.0f &&
-			distance4 < 0.0f && distance5 < 0.0f && distance6 < 0.0f && distance7 < 0.0f)
-		{
-			return true;
-		}
+		return false;
 	}
 
-	return false;
-	*/
-	return {};
+	auto normal = cross3(f0, f1);
+	auto distance = std::abs(dot3(normal, v.c0));
+	r = extX * std::abs(normal.getX()) + extY * std::abs(normal.getY()) + extZ * std::abs(normal.getZ());
+	return distance <= r;
 }
\ No newline at end of file
diff --git a/source/bvh.cpp b/source/bvh.cpp
index 327b60f..b982637 100644
--- a/source/bvh.cpp
+++ b/source/bvh.cpp
@@ -20,39 +20,40 @@
 #include <cfloat>
 #include <functional>
 
+// TODO: Utilize Jolt BVH building and update optimizations.
+
 using namespace math;
 
 //**********************************************************************************************************************
 static Aabb calculateNodeAabb(const Bvh::Node* node, const uint8* vertices, const uint8* indices,
 	const uint32* primitives, uint32 vertexSize, uint32 indexSize, const function<uint32(const uint8*)>& getIndex)
 {
-	auto lastPrimitive = node->leaf.firstPrimitive + node->base.primitiveCount;
-	float3 min = float3(FLT_MAX), max = float3(-FLT_MAX);
-	for (uint32 i = node->leaf.firstPrimitive; i < lastPrimitive; i++)
+	auto lastPrimitive = node->getFirstPrimitive() + node->getPrimitiveCount();
+	simd_f32_4 min = simd_f32_4::max, max = simd_f32_4::minusMax;
+	for (uint32 i = node->getFirstPrimitive(); i < lastPrimitive; i++)
 	{
 		auto offset = primitives[i] * Triangle::pointCount;
 		auto i0 = getIndex(indices + (offset    ) * indexSize);
 		auto i1 = getIndex(indices + (offset + 1) * indexSize);
 		auto i2 = getIndex(indices + (offset + 2) * indexSize);
-		auto v0 = *(const float3*)(vertices + i0 * vertexSize);
-		auto v1 = *(const float3*)(vertices + i1 * vertexSize);
-		auto v2 = *(const float3*)(vertices + i2 * vertexSize);
-		min = ::min(min, v0); max = ::max(max, v0);
-		min = ::min(min, v1); max = ::max(max, v1);
-		min = ::min(min, v2); max = ::max(max, v2);
+		auto v0 = simd_f32_4(*(const float3*)(vertices + i0 * vertexSize));
+		auto v1 = simd_f32_4(*(const float3*)(vertices + i1 * vertexSize));
+		auto v2 = simd_f32_4(*(const float3*)(vertices + i2 * vertexSize));
+		min = math::min(math::min(math::min(min, v0), v1), v2); 
+		max = math::max(math::max(math::max(max, v0), v1), v2);
 	}
 	return Aabb(min, max);
 }
 
 static Aabb calculateNodeAabb(const Bvh::Node* node, const Aabb* aabbs, const uint32* primitives)
 {
-	auto lastPrimitive = node->leaf.firstPrimitive + node->base.primitiveCount;
-	float3 min = float3(FLT_MAX), max = float3(-FLT_MAX);
-	for (uint32 i = node->leaf.firstPrimitive; i < lastPrimitive; i++)
+	auto lastPrimitive = node->getFirstPrimitive() + node->getPrimitiveCount();
+	simd_f32_4 min = simd_f32_4::max, max = simd_f32_4::minusMax;
+	for (uint32 i = node->getFirstPrimitive(); i < lastPrimitive; i++)
 	{
 		auto aabb = aabbs[primitives[i]];
-		min = ::min(min, aabb.getMin());
-		max = ::max(max, aabb.getMax());
+		min = math::min(min, aabb.getMin());
+		max = math::max(max, aabb.getMax());
 	}
 	return Aabb(min, max);
 }
@@ -60,14 +61,23 @@ static Aabb calculateNodeAabb(const Bvh::Node* node, const Aabb* aabbs, const ui
 //**********************************************************************************************************************
 #define BIN_COUNT 8
 
+namespace
+{
+	struct Bin final
+	{
+		simd_f32_4 min = simd_f32_4::max;
+		simd_f32_4 max = simd_f32_4::minusMax;
+	};
+}
+
 static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, uint32 vertexSize, uint32 indexSize,
-	const uint8* vertices, const uint8* indices, const uint32* primitives, const float3* centroids,
+	const uint8* vertices, const uint8* indices, const uint32* primitives, const simd_f32_4* centroids,
 	const function<uint32(const uint8*)>& getIndex, int32& axis, float& split, float& cost)
 {
 	auto bestAxis = 0;
 	auto bestSplit = 0.0f, bestCost = FLT_MAX;
 
-	for (uint8 a = 0; a < 3; a++)
+	for (int a = 0; a < 3; a++)
 	{
 		auto boundsMin = FLT_MAX, boundsMax = -FLT_MAX;
 		for (uint32 i = firstPrimitive; i < lastPrimitive; i++)
@@ -80,14 +90,9 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, uint
 		if (boundsMin == boundsMax)
 			continue;
 
-		struct Bin
-		{
-			float3 min = float3(FLT_MAX);
-			float3 max = float3(-FLT_MAX);
-			int32 primitiveCount = 0;
-		} bins[BIN_COUNT] = {};
-
+		Bin bins[BIN_COUNT] = {};
 		auto scale = BIN_COUNT / (boundsMax - boundsMin);
+
 		for (uint32 i = firstPrimitive; i < lastPrimitive; i++)
 		{
 			auto index = primitives[i];
@@ -95,33 +100,32 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, uint
 			auto i0 = getIndex(indices + (offset    ) * indexSize);
 			auto i1 = getIndex(indices + (offset + 1) * indexSize);
 			auto i2 = getIndex(indices + (offset + 2) * indexSize);
-			auto v0 = *(const float3*)(vertices + i0 * vertexSize);
-			auto v1 = *(const float3*)(vertices + i1 * vertexSize);
-			auto v2 = *(const float3*)(vertices + i2 * vertexSize);
+			auto v0 = simd_f32_4(*(const float3*)(vertices + i0 * vertexSize));
+			auto v1 = simd_f32_4(*(const float3*)(vertices + i1 * vertexSize));
+			auto v2 = simd_f32_4(*(const float3*)(vertices + i2 * vertexSize));
 
 			auto binIndex = std::min((int32)(BIN_COUNT - 1), (int32)((
 				centroids[index][a] - boundsMin) * scale));
 			auto& bin = bins[binIndex];
-			bin.min = min(bin.min, v0); bin.max = max(bin.max, v0);
-			bin.min = min(bin.min, v1); bin.max = max(bin.max, v1);
-			bin.min = min(bin.min, v2); bin.max = max(bin.max, v2);
-			bin.primitiveCount++;
+			bin.min = min(min(min(bin.min, v0), v1), v2);
+			bin.max = max(max(max(bin.max, v0), v1), v2);
+			bin.max.ints.w++;
 		}
 
 		float leftArea[BIN_COUNT - 1], rightArea[BIN_COUNT - 1];
 		int32 leftCount[BIN_COUNT - 1], rightCount[BIN_COUNT - 1];
-		float3 leftMin = float3(FLT_MAX), leftMax = float3(-FLT_MAX);
-		float3 rightMin = float3(FLT_MAX), rightMax = float3(-FLT_MAX);
+		simd_f32_4 leftMin = simd_f32_4::max, leftMax = simd_f32_4::minusMax;
+		simd_f32_4 rightMin = simd_f32_4::max, rightMax = simd_f32_4::minusMax;
 		int32 leftSum = 0, rightSum = 0;
 
-		for (uint8 i = 0; i < BIN_COUNT - 1; i++)
+		for (int i = 0; i < BIN_COUNT - 1; i++)
 		{
-			leftSum += bins[i].primitiveCount;
+			leftSum += bins[i].max.ints.w;
 			leftCount[i] = leftSum;
 			leftMin = min(leftMin, bins[i].min);
 			leftMax = max(leftMax, bins[i].max);
 			leftArea[i] = Aabb(leftMin, leftMax).calcArea();
-			rightSum += bins[(BIN_COUNT - 1) - i].primitiveCount;
+			rightSum += bins[(BIN_COUNT - 1) - i].max.ints.w;
 			rightCount[(BIN_COUNT - 2) - i] = rightSum;
 			rightMin = min(rightMin, bins[(BIN_COUNT - 1) - i].min);
 			rightMax = max(rightMax, bins[(BIN_COUNT - 1) - i].max);
@@ -129,7 +133,7 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, uint
 		}
 
 		scale = (boundsMax - boundsMin) / BIN_COUNT;
-		for (uint8 i = 0; i < BIN_COUNT - 1; i++)
+		for (int i = 0; i < BIN_COUNT - 1; i++)
 		{
 			float planeCost = leftCount[i] * leftArea[i] +
 				rightCount[i] * rightArea[i];
@@ -149,12 +153,12 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, uint
 
 //**********************************************************************************************************************
 static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, const Aabb* aabbs,
-	const uint32* primitives, const float3* centroids, int32& axis, float& split, float& cost)
+	const uint32* primitives, const simd_f32_4* centroids, int32& axis, float& split, float& cost)
 {
 	auto bestAxis = 0;
 	auto bestSplit = 0.0f, bestCost = FLT_MAX;
 
-	for (uint8 a = 0; a < 3; a++)
+	for (int a = 0; a < 3; a++)
 	{
 		auto boundsMin = FLT_MAX, boundsMax = -FLT_MAX;
 		for (uint32 i = firstPrimitive; i < lastPrimitive; i++)
@@ -167,14 +171,9 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, cons
 		if (boundsMin == boundsMax)
 			continue;
 
-		struct Bin
-		{
-			float3 min = float3(FLT_MAX);
-			float3 max = float3(-FLT_MAX);
-			int32 primitiveCount = 0;
-		} bins[BIN_COUNT] = {};
-
+		Bin bins[BIN_COUNT] = {};
 		auto scale = BIN_COUNT / (boundsMax - boundsMin);
+
 		for (uint32 i = firstPrimitive; i < lastPrimitive; i++)
 		{
 			auto index = primitives[i];
@@ -184,24 +183,24 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, cons
 			auto& bin = bins[binIndex];
 			bin.min = min(bin.min, aabb.getMin());
 			bin.max = max(bin.max, aabb.getMax());
-			bin.primitiveCount++;
+			bin.max.ints.w++;
 		}
 
 		// TODO: share this function part with a previous one?
 		float leftArea[BIN_COUNT - 1], rightArea[BIN_COUNT - 1];
 		int32 leftCount[BIN_COUNT - 1], rightCount[BIN_COUNT - 1];
-		float3 leftMin = float3(FLT_MAX), leftMax = float3(-FLT_MAX);
-		float3 rightMin = float3(FLT_MAX), rightMax = float3(-FLT_MAX);
+		simd_f32_4 leftMin = simd_f32_4::max, leftMax = simd_f32_4::minusMax;
+		simd_f32_4 rightMin = simd_f32_4::max, rightMax = simd_f32_4::minusMax;
 		int32 leftSum = 0, rightSum = 0;
 
-		for (uint8 i = 0; i < BIN_COUNT - 1; i++)
+		for (int i = 0; i < BIN_COUNT - 1; i++)
 		{
-			leftSum += bins[i].primitiveCount;
+			leftSum += bins[i].max.ints.w;
 			leftCount[i] = leftSum;
 			leftMin = min(leftMin, bins[i].min);
 			leftMax = max(leftMax, bins[i].max);
 			leftArea[i] = Aabb(leftMin, leftMax).calcArea();
-			rightSum += bins[(BIN_COUNT - 1) - i].primitiveCount;
+			rightSum += bins[(BIN_COUNT - 1) - i].max.ints.w;
 			rightCount[(BIN_COUNT - 2) - i] = rightSum;
 			rightMin = min(rightMin, bins[(BIN_COUNT - 1) - i].min);
 			rightMax = max(rightMax, bins[(BIN_COUNT - 1) - i].max);
@@ -209,7 +208,7 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, cons
 		}
 
 		scale = (boundsMax - boundsMin) / BIN_COUNT;
-		for (uint8 i = 0; i < BIN_COUNT - 1; i++)
+		for (int i = 0; i < BIN_COUNT - 1; i++)
 		{
 			float planeCost = leftCount[i] * leftArea[i] +
 				rightCount[i] * rightArea[i];
@@ -229,17 +228,17 @@ static void findBestSplitPlane(uint32 firstPrimitive, uint32 lastPrimitive, cons
 
 //**********************************************************************************************************************
 Bvh::Bvh(const uint8* vertices, const uint8* indices, const Aabb& aabb,
-	uint32 indexCount, uint32 vertexSize, uint32 indexSize, const float3* _centroids)
+	uint32 indexCount, uint32 vertexSize, uint32 indexSize, const simd_f32_4* _centroids)
 {
 	recreate(vertices, indices, aabb, indexCount, vertexSize, indexSize, _centroids);
 }
-Bvh::Bvh(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const float3* _centroids)
+Bvh::Bvh(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const simd_f32_4* _centroids)
 {
 	recreate(aabbs, aabb, aabbCount, _centroids);
 }
 
 void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb,
-	uint32 indexCount, uint32 vertexSize, uint32 indexSize, const float3* _centroids)
+	uint32 indexCount, uint32 vertexSize, uint32 indexSize, const simd_f32_4* _centroids)
 {
 	assert(vertices);
 	assert(indices);
@@ -262,7 +261,7 @@ void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb
 		getIndex = getIndex16;
 	else if (indexSize != sizeof(uint32)) abort();
 
-	const float3* centroidData;
+	const simd_f32_4* centroidData;
 	if (!_centroids)
 	{
 		centroids.resize(primitiveCount);
@@ -274,10 +273,10 @@ void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb
 			auto i0 = getIndex(indices + (offset    ) * indexSize);
 			auto i1 = getIndex(indices + (offset + 1) * indexSize);
 			auto i2 = getIndex(indices + (offset + 2) * indexSize);
-			auto v0 = (const float3*)(vertices + i0 * vertexSize);
-			auto v1 = (const float3*)(vertices + i1 * vertexSize);
-			auto v2 = (const float3*)(vertices + i2 * vertexSize);
-			centroids[i] = (*v0 + *v1 + *v2) * (1.0f / 3.0f);
+			auto v0 = simd_f32_4(*(const float3*)(vertices + i0 * vertexSize));
+			auto v1 = simd_f32_4(*(const float3*)(vertices + i1 * vertexSize));
+			auto v2 = simd_f32_4(*(const float3*)(vertices + i2 * vertexSize));
+			centroids[i] = (v0 + v1 + v2) * (1.0f / 3.0f);
 		}
 	}
 	else
@@ -287,21 +286,21 @@ void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb
 
 	auto nodeCount = (uint32)1;
 	auto node = &nodes[0];
-	node->setAabb(aabb);
-	node->leaf.primitiveCount = primitiveCount;
-	node->leaf.firstPrimitive = 0;
+	node->aabb = aabb;
+	node->setPrimitiveCount(primitiveCount);
+	node->setFirstPrimitive(0);
 
 	while (true)
 	{
-		auto firstPrimitive = node->leaf.firstPrimitive;
-		auto primitiveCount = node->leaf.primitiveCount;
+		auto firstPrimitive = node->getFirstPrimitive();
+		auto primitiveCount = node->getPrimitiveCount();
 		auto lastPrimitive = firstPrimitive + primitiveCount;
 
 		int32 axis; float split, cost;
 		findBestSplitPlane(firstPrimitive, lastPrimitive, vertexSize, indexSize,
 			vertices, indices, primitives.data(), centroidData, getIndex, axis, split, cost);
 
-		auto nodeCost = node->getAabb().calcArea() * primitiveCount;
+		auto nodeCost = node->aabb.calcArea() * primitiveCount;
 		if (cost >= nodeCost)
 		{
 			if (nodeStack.empty())
@@ -340,20 +339,20 @@ void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb
 		auto leftNodeID = nodeCount;
 		auto rightNodeID = nodeCount + 1;
 		nodeCount += 2;
-		node->child.leftNode = leftNodeID;
-		node->child.primitiveCount = 0;
+		node->setLeftNode(leftNodeID);
+		node->setPrimitiveCount(0);
 	
 		auto node1 = &nodes[leftNodeID];
-		node1->leaf.firstPrimitive = firstPrimitive;
-		node1->leaf.primitiveCount = count1;
-		node1->setAabb(calculateNodeAabb(node1, vertices,
-			indices, primitives.data(), vertexSize, indexSize, getIndex));
+		node1->setFirstPrimitive(firstPrimitive);
+		node1->setPrimitiveCount(count1);
+		node1->aabb = calculateNodeAabb(node1, vertices, indices, 
+			primitives.data(), vertexSize, indexSize, getIndex);
 
 		auto node2 = &nodes[rightNodeID];
-		node2->leaf.firstPrimitive = i;
-		node2->leaf.primitiveCount = count2;
-		node2->setAabb(calculateNodeAabb(node2, vertices,
-			indices, primitives.data(), vertexSize, indexSize, getIndex));
+		node2->setFirstPrimitive(i);
+		node2->setPrimitiveCount(count2);
+		node2->aabb = calculateNodeAabb(node2, vertices, indices, 
+			primitives.data(), vertexSize, indexSize, getIndex);
 
 		if (count2 > count1)
 		{
@@ -383,7 +382,7 @@ void Bvh::recreate(const uint8* vertices, const uint8* indices, const Aabb& aabb
 }
 
 //**********************************************************************************************************************
-void Bvh::recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const float3* _centroids)
+void Bvh::recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const simd_f32_4* _centroids)
 {
 	assert(aabbs);
 	assert(aabbCount > 0);
@@ -394,7 +393,7 @@ void Bvh::recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const
 	for (uint32 i = 0; i < aabbCount; i++)
 		primitiveData[i] = i;
 
-	const float3* centroidData;
+	const simd_f32_4* centroidData;
 	if (!_centroids)
 	{
 		centroids.resize(aabbCount);
@@ -414,21 +413,21 @@ void Bvh::recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const
 	auto nodeData = nodes.data();
 	auto nodeCount = (uint32)1;
 	auto node = &nodeData[0];
-	node->setAabb(aabb);
-	node->leaf.primitiveCount = aabbCount;
-	node->leaf.firstPrimitive = 0;
+	node->aabb = aabb;
+	node->setPrimitiveCount(aabbCount);
+	node->setFirstPrimitive(0);
 
 	while (true)
 	{
-		auto firstPrimitive = node->leaf.firstPrimitive;
-		auto primitiveCount = node->leaf.primitiveCount;
+		auto firstPrimitive = node->getFirstPrimitive();
+		auto primitiveCount = node->getPrimitiveCount();
 		auto lastPrimitive = firstPrimitive + primitiveCount;
 
 		int32 axis; float split, cost;
 		findBestSplitPlane(firstPrimitive, lastPrimitive, aabbs,
 			primitiveData, centroidData, axis, split, cost);
 
-		auto nodeCost = node->getAabb().calcArea() * primitiveCount;
+		auto nodeCost = node->aabb.calcArea() * primitiveCount;
 		if (cost >= nodeCost)
 		{
 			if (nodeStack.empty())
@@ -467,18 +466,18 @@ void Bvh::recreate(const Aabb* aabbs, const Aabb& aabb, uint32 aabbCount, const
 		auto leftNodeID = nodeCount;
 		auto rightNodeID = nodeCount + 1;
 		nodeCount += 2;
-		node->child.leftNode = leftNodeID;
-		node->child.primitiveCount = 0;
+		node->setLeftNode(leftNodeID);
+		node->setPrimitiveCount(0);
 	
 		auto node1 = &nodeData[leftNodeID];
-		node1->leaf.firstPrimitive = firstPrimitive;
-		node1->leaf.primitiveCount = count1;
-		node1->setAabb(calculateNodeAabb(node1, aabbs, primitiveData));
+		node1->setFirstPrimitive(firstPrimitive);
+		node1->setPrimitiveCount(count1);
+		node1->aabb = calculateNodeAabb(node1, aabbs, primitiveData);
 
 		auto node2 = &nodeData[rightNodeID];
-		node2->leaf.firstPrimitive = i;
-		node2->leaf.primitiveCount = count2;
-		node2->setAabb(calculateNodeAabb(node2, aabbs, primitiveData));
+		node2->setFirstPrimitive(i);
+		node2->setPrimitiveCount(count2);
+		node2->aabb = calculateNodeAabb(node2, aabbs, primitiveData);
 
 		if (count2 > count1)
 		{
diff --git a/tests/test-containers.cpp b/tests/test-containers.cpp
new file mode 100644
index 0000000..115e7a3
--- /dev/null
+++ b/tests/test-containers.cpp
@@ -0,0 +1,40 @@
+// Copyright 2022-2025 Nikita Fediuchin. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "math/angles.hpp"
+#include "math/sphere.hpp"
+#include "math/matrix/projection.hpp"
+#include <stdexcept>
+
+using namespace math;
+
+static void testPlanes()
+{
+	auto plane = Plane(simd_f32_4(1.0f, 2.0f, 3.0f), 12.3f);
+	if (!isNormalized3(plane.getNormal()))
+		throw runtime_error("Plane is not normalized.");
+
+	Plane frustumPlanes[Plane::frustumCount];
+	auto viewProj = calcPerspProjInfRevZ(radians(90.0f), 16.0f / 9.0f, 0.01f);
+	extractFrustumPlanes(simd_f32_4x4(viewProj), frustumPlanes);
+	normalizePlanes(frustumPlanes, Plane::frustumCount);
+}
+
+// TODO: test other containers.
+
+int main()
+{
+	testPlanes();
+	return EXIT_SUCCESS;
+}
\ No newline at end of file
diff --git a/tests/test-matrix.cpp b/tests/test-matrix.cpp
index 8008ecf..ccfa7ea 100644
--- a/tests/test-matrix.cpp
+++ b/tests/test-matrix.cpp
@@ -12,26 +12,72 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "math/matrix.hpp"
+#include "math/matrix/transform.hpp"
+#include "math/angles.hpp"
 #include <stdexcept>
 
 using namespace math;
 
-static void cmp(simd_f32_4x4 a, float4x4 b)
+static void cmp(simd_f32_4 a, float4 b, float tolerance = 1.0e-9f)
+{
+	for (int i = 0; i < 4; i++)
+	{
+		if (isnan(a[i]))
+			a[i] = 0.0f;
+		if (isnan(b[i]))
+			b[i] = 0.0f;
+	}
+
+	auto difference = distanceSq((float4)a, b);
+	if (difference > tolerance)
+		throw runtime_error("Float4 vectors test failed.");
+}
+static void cmp(simd_f32_4 a, float3 b, float tolerance = 1.0e-9f)
+{
+	for (int i = 0; i < 3; i++)
+	{
+		if (isnan(a[i]))
+			a[i] = 0.0f;
+		if (isnan(b[i]))
+			b[i] = 0.0f;
+	}
+
+	auto difference = distanceSq((float3)a, b);
+	if (difference > tolerance)
+		throw runtime_error("Float3 vectors test failed.");
+}
+static void cmp(simd_f32_4x4 a, float4x4 b, float tolerance = 1.0e-9f)
 {
 	for (int c = 0; c < 4; c++)
 	{
 		for (int r = 0; r < 4; r++)
 		{
-			if (isnan(a.columns[c][r]))
-				a.columns[c][r] = 0.0f;
+			if (isnan(a[c][r]))
+				a[c][r] = 0.0f;
+			if (isnan(b[c][r]))
+				b[c][r] = 0.0f;
+		}
+
+		auto difference = distanceSq((float4)a[c], b[c]);
+		if (difference > tolerance)
+			throw runtime_error("Float4 matrices test failed.");
+	}
+}
+static void cmp(simd_f32_4x4 a, float3x3 b, float tolerance = 1.0e-9f)
+{
+	for (int c = 0; c < 3; c++)
+	{
+		for (int r = 0; r < 3; r++)
+		{
+			if (isnan(a[c][r]))
+				a[c][r] = 0.0f;
 			if (isnan(b[c][r]))
 				b[c][r] = 0.0f;
 		}
 
-		auto difference = distanceSq((float4)a.columns[c], b[c]);
-		if (difference > 1.0e-10f)
-			throw runtime_error("Float matrices test failed.");
+		auto difference = distanceSq((float3)a[c], b[c]);
+		if (difference > tolerance)
+			throw runtime_error("Float4 matrices test failed.");
 	}
 }
 static void cmp(int a, int b)
@@ -40,23 +86,25 @@ static void cmp(int a, int b)
 		throw runtime_error("Int test failed.");
 }
 
-int main()
-{
-	const auto matLeft = float4x4(
-		0.0f, 2.3f, 4.56f, 78.9f,
-		1.0f, 0.023f, 4.5f, 6789.0f,
-		0.01f, 0.0f, 23.456f, 7.089f,
-		1.234f, 5.0006f, 78.09f, 0.0f);
-	const auto matRight = float4x4(
-		1.0f, -23.0f, 4.56f, -0.0789,
-		10.023f, -4.0f, -56.708f, 0.009f,
-		-100.0f, 23.0405f, 0.0678f, -0.9f,
-		1.234f, 6789.0f, 500.0f, 1.0f);
-	auto simdLeft = simd_f32_4x4(matLeft), simdRight = simd_f32_4x4(matRight);
+static constexpr auto matLeft = float4x4(
+	0.0f, 2.3f, 4.56f, 78.9f,
+	1.0f, 0.023f, 4.5f, 6789.0f,
+	0.01f, 0.0f, 23.456f, 7.089f,
+	1.234f, 5.0006f, 78.09f, 0.0f);
+static constexpr auto matRight = float4x4(
+	1.0f, -23.0f, 4.56f, -0.0789,
+	10.023f, -4.0f, -56.708f, 0.009f,
+	-100.0f, 23.0405f, 0.0678f, -0.9f,
+	1.234f, 6789.0f, 500.0f, 1.0f);
+static const auto simdLeft = simd_f32_4x4(matLeft), simdRight = simd_f32_4x4(matRight);
 
+static void testFloatMatrices()
+{
 	cmp(simdLeft, matLeft);
 	cmp(simdRight, matRight);
 	cmp(simdLeft * simdRight, matLeft * matRight);
+	cmp(simdLeft * simdRight.c0, matLeft * matRight.c0);
+	cmp(simdRight * simdRight.c0, matRight * matRight.c0);
 	cmp(simdRight + 123.4f, matRight + 123.4f);
 	cmp(simdRight - 123.4f, matRight - 123.4f);
 	cmp(simdRight * 123.4f, matRight * 123.4f);
@@ -65,6 +113,55 @@ int main()
 	cmp(simdRight == simdRight, true);
 	cmp(simdLeft != simdRight, true);
 	cmp(simdRight == simdLeft, false);
-	
+	cmp(simdRight == simdLeft, false);
+	cmp(multiply3x3(simdLeft, simdRight.c0), (float3x3)matLeft * (float3)matRight.c0);
+	cmp(multiply3x3(simdRight, simdRight.c0), (float3x3)matRight * (float3)matRight.c0);
+	cmp(transpose4x4(simdLeft), transpose(matLeft));
+	cmp(transpose3x3(simdRight), transpose((float3x3)matRight));
+	cmp(inverse4x4(simdLeft), inverse(matLeft));
+}
+
+static void testMatrixTransforms()
+{
+	{
+		const auto vecTrans = float3(0.0f, 1.23f, -45.067f);
+		const auto simdTrans = simd_f32_4(vecTrans);
+		auto mat = translate(vecTrans);
+		auto simd = translate(simdTrans);
+		cmp(simd, mat);
+		cmp(getTranslation(simd), getTranslation(mat));
+		cmp(translate(simd, simdTrans), translate(mat, vecTrans));
+		cmp(translate(simdTrans, simd), translate(vecTrans, mat));
+		cmp(inverseTransRot(simd), inverseTransRot(mat));
+	}
+	{
+		const auto vecScale = float3(1.0f, 0.5f, 0.01f);
+		const auto simdScale = simd_f32_4(vecScale);
+		auto mat = scale(vecScale);
+		auto simd = scale(simdScale);
+		cmp(extractScale(simd), extractScale(mat));
+		cmp(scale(simd, simdScale), scale(mat, vecScale));
+		cmp(scale(simdScale, simd), scale(vecScale, mat));
+	}
+	{
+		const auto rot = radians(float3(-1.234f, 30.0f, -90.0f));
+		const auto vecRot = quat(rot);
+		const auto simdRot = fromEulerAngles(simd_f32_4(rot));
+		auto mat = rotate(vecRot);
+		auto simd = rotate(simdRot);
+		cmp(extractRotation(simd), extractRotation(mat));
+		cmp(extractRotationOnly(simd), extractRotationOnly(mat));
+		cmp(extractQuat(simd), extractQuat(mat));
+		cmp(simd * rotate(simdRot), mat * rotate(vecRot));
+		cmp(rotate(simdRot) * simd, rotate(vecRot) * mat);
+		cmp(inverseTransRot(simd), inverseTransRot(mat));
+
+		// TODO: lookAt
+	}
+}
+
+int main()
+{
+	testFloatMatrices();
 	return EXIT_SUCCESS;
 }
\ No newline at end of file
diff --git a/tests/test-quaternion.cpp b/tests/test-quaternion.cpp
new file mode 100644
index 0000000..bb32c62
--- /dev/null
+++ b/tests/test-quaternion.cpp
@@ -0,0 +1,88 @@
+// Copyright 2022-2025 Nikita Fediuchin. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "math/quaternion.hpp"
+#include "math/angles.hpp"
+
+#include <stdexcept>
+#include <iostream>
+
+using namespace math;
+
+static void cmp(simd_f32_4 a, float4 b, float tolerance = 1.0e-9f)
+{
+	for (int i = 0; i < 4; i++)
+	{
+		if (isnan(a[i]))
+			a[i] = 0.0f;
+		if (isnan(b[i]))
+			b[i] = 0.0f;
+	}
+
+	auto difference = distanceSq((float4)a, b);
+	if (difference > tolerance)
+		throw runtime_error("Float4 vectors test failed.");
+}
+static void cmp(simd_f32_4 a, float3 b, float tolerance = 1.0e-9f)
+{
+	for (int i = 0; i < 3; i++)
+	{
+		if (isnan(a[i]))
+			a[i] = 0.0f;
+		if (isnan(b[i]))
+			b[i] = 0.0f;
+	}
+
+	auto difference = distanceSq((float3)a, b);
+	if (difference > tolerance)
+		throw runtime_error("Float3 vectors test failed.");
+}
+static void cmp(float a, float b, float tolerance = 1.0e-9f)
+{
+	auto difference = (a - b) * (a - b);
+	if (difference > tolerance)
+		throw runtime_error("Float test failed.");
+}
+
+static void testQuatVectors()
+{
+	const auto vecAngles = radians(float4(90.0f, -1.23f, 45.0f, 0.0f));
+	const auto simdAngles = simd_f32_4(vecAngles);
+	const auto vecQuat = quat((float3)vecAngles);
+	const auto simdQuat = fromEulerAngles(simdAngles);
+	cmp(simdQuat, vecQuat);
+	cmp(simd_f32_quat(vecAngles.x, simd_f32_4(1.0f, 0.0f, 0.0f, 0.0f)), quat(vecAngles.x, float3::right));
+	cmp(simd_f32_quat(vecAngles.y, simd_f32_4(0.0f, -1.0f, 0.0f, 0.0f)), quat(vecAngles.y, float3::bottom));
+	cmp(simdQuat.extractEulerAngles(), vecQuat.extractEulerAngles());
+	cmp(simdQuat.extractPitch(), vecQuat.extractPitch());
+	cmp(simdQuat.extractYaw(), vecQuat.extractYaw());
+	cmp(simdQuat.extractRoll(), vecQuat.extractRoll());
+	cmp(conjugate(simdQuat), conjugate(vecQuat));
+	cmp(inverse(simdQuat), inverse(vecQuat));
+	cmp(slerp(simdQuat, simdQuat * simdQuat, 0.75f), 
+		slerp(vecQuat, vecQuat * vecQuat, 0.75f));
+	cmp(simdQuat * simdQuat, vecQuat * vecQuat);
+	{
+		const auto vev = float3(1.0f, 2.34f, -0.5678f);
+		const auto simd = simd_f32_4(vev);
+		cmp(simdQuat * simd, vecQuat * vev);
+		cmp(simd * simdQuat, vev * vecQuat);
+	}
+}
+
+int main()
+{
+	testQuatVectors();
+	return EXIT_SUCCESS;
+}
\ No newline at end of file
diff --git a/tests/test-vector.cpp b/tests/test-vector.cpp
index 092d548..d0fd715 100644
--- a/tests/test-vector.cpp
+++ b/tests/test-vector.cpp
@@ -18,34 +18,48 @@
 
 using namespace math;
 
-static void cmp(simd_f32_4 a, float4 b)
+static void cmp(simd_f32_4 a, float4 b, float tolerance = 1.0e-9f)
 {
 	for (int i = 0; i < 4; i++)
 	{
-		if (isnan(a.vec[i]))
-			a.vec[i] = 0.0f;
+		if (isnan(a[i]))
+			a[i] = 0.0f;
 		if (isnan(b[i]))
 			b[i] = 0.0f;
 	}
 
 	auto difference = distanceSq((float4)a, b);
-	if (difference > 1.0e-10f)
-		throw runtime_error("Float vectors test failed.");
+	if (difference > tolerance)
+		throw runtime_error("Float4 vectors test failed.");
+}
+static void cmp(simd_f32_4 a, float3 b, float tolerance = 1.0e-9f)
+{
+	for (int i = 0; i < 3; i++)
+	{
+		if (isnan(a[i]))
+			a[i] = 0.0f;
+		if (isnan(b[i]))
+			b[i] = 0.0f;
+	}
+
+	auto difference = distanceSq((float3)a, b);
+	if (difference > tolerance)
+		throw runtime_error("Float3 vectors test failed.");
 }
 static void cmp(simd_i32_4 a, int4 b)
 {
 	if ((int4)a != b)
-		throw runtime_error("Float vectors test failed.");
+		throw runtime_error("Int4 vectors test failed.");
 }
 static void cmp(simd_u32_4 a, uint4 b)
 {
 	if ((uint4)a != b)
-		throw runtime_error("Uint vectors test failed.");
+		throw runtime_error("Uint4 vectors test failed.");
 }
-static void cmp(float a, float b)
+static void cmp(float a, float b, float tolerance = 1.0e-9f)
 {
 	auto difference = (a - b) * (a - b);
-	if (difference > 1.0e-12f)
+	if (difference > tolerance)
 		throw runtime_error("Float test failed.");
 }
 static void cmp(int a, int b)
@@ -74,12 +88,20 @@ static void testFloatVectors()
 	cmp(simdLeft > simdRight, vecLeft > vecRight);
 	cmp(simdLeft <= simdRight, vecLeft <= vecRight);
 	cmp(simdLeft >= simdRight, vecLeft >= vecRight);
-	cmp(compare(simdLeft, simdLeft), compare(vecLeft, vecLeft));
-	cmp(compare(simdRight, simdRight), compare(vecRight, vecRight));
-	cmp(compare(simdLeft, simdRight), compare(vecLeft, vecRight));
+	cmp(equal(simdLeft, simdLeft), equal(vecLeft, vecLeft));
+	cmp(equal(simdRight, simdRight), equal(vecRight, vecRight));
+	cmp(equal(simdLeft, simdRight), equal(vecLeft, vecRight));
+	cmp(notEqual(simdLeft, simdLeft), notEqual(vecLeft, vecLeft));
+	cmp(notEqual(simdRight, simdRight), notEqual(vecRight, vecRight));
+	cmp(notEqual(simdLeft, simdRight), notEqual(vecLeft, vecRight));
 	cmp(select(simd_u32_4(1, 0, 0, 1), simdLeft, simdRight), select(uint4(1, 0, 0, 1), vecLeft, vecRight));
+	cmp(select(simd_u32_4(0, 1, 0, 1), simdRight, simdLeft), select(uint4(0, 1, 0, 1), vecRight, vecLeft));
 	cmp(min(simdLeft, simdRight), min(vecLeft, vecRight));
 	cmp(max(simdLeft, simdRight), max(vecLeft, vecRight));
+	cmp(min3(simdLeft), min(vecLeft.x, vecLeft.y, vecLeft.z));
+	cmp(min3(simdRight), min(vecRight.x, vecRight.y, vecRight.z));
+	cmp(max3(simdLeft), max(vecLeft.x, vecLeft.y, vecLeft.z));
+	cmp(max3(simdRight), max(vecRight.x, vecRight.y, vecRight.z));
 	cmp(fma(simdLeft, simdRight, simdRight), fma(vecLeft, vecRight, vecRight));
 	cmp(abs(simdLeft), abs(vecLeft));
 	cmp(abs(simdRight), abs(vecRight));
@@ -101,21 +123,41 @@ static void testFloatVectors()
 	cmp(dot4(simdLeft, simdRight), dot(vecLeft, vecRight));
 	cmp(dotV3(simdLeft, simdRight), float4(dot((float3)vecLeft, (float3)vecRight)));
 	cmp(dot3(simdLeft, simdRight), dot((float3)vecLeft, (float3)vecRight));
-	{
-		auto c = cross((float3)vecLeft, (float3)vecRight);
-		cmp(cross3(simdLeft, simdRight), float4(c, c.z));
-	}
+	cmp(cross3(simdLeft, simdRight), cross((float3)vecLeft, (float3)vecRight));
 	cmp(length4(simdLeft), length(vecLeft));
 	cmp(length4(simdRight), length(vecRight));
 	cmp(length3(simdLeft), length((float3)vecLeft));
 	cmp(length3(simdRight), length((float3)vecRight));
 	cmp(normalize4(simdLeft), normalize(vecLeft));
 	cmp(normalize4(simdRight), normalize(vecRight));
+	cmp(normalize3(simdLeft), normalize((float3)vecLeft));
+	cmp(normalize3(simdRight), normalize((float3)vecRight));
+	cmp(isNormalized4(normalize4(simdLeft)), true);
+	cmp(isNormalized3(normalize3(simdRight)), true);
+	cmp(isNormalized(normalize(vecLeft)), true);
+	cmp(isNormalized(normalize((float3)vecRight)), true);
+	cmp(log(simdLeft), log(vecLeft));
+	cmp(log(simdRight), log(vecRight));
+	cmp(exp(simdLeft), exp(vecLeft));
+	cmp(exp(simdRight), exp(vecRight));
+	{
+		const auto vecPow = float4(2.0f, 1.0f, 0.5f, 0.0f);
+		const auto simdPow = simd_f32_4(vecPow);
+		cmp(mod(simdLeft, simdPow), mod(vecLeft, vecPow));
+		cmp(mod(simdRight, simdPow), mod(vecRight, vecPow));
+		cmp(pow(simdLeft, simdPow), pow(vecLeft, vecPow));
+		cmp(pow(simdRight, simdPow), pow(vecRight, vecPow));
+		cmp(fastPow(simdLeft, simdPow), pow(vecLeft, vecPow), 1.0e-6f);
+		cmp(fastPow(simdRight, simdPow), pow(vecRight, vecPow), 1.0e-6f);
+	}
 	{
-		auto c = normalize((float3)vecLeft);
-		cmp(normalize3(simdLeft), float4(c, c.z));
-		c = normalize((float3)vecRight);
-		cmp(normalize3(simdRight), float4(c, c.z));
+		simd_f32_4 simdSin, simdCos;
+		sinCos(simdLeft, simdSin, simdCos);
+		cmp(simdSin, sin(vecLeft));
+		cmp(simdCos, cos(vecLeft));
+		sinCos(simdRight, simdSin, simdCos);
+		cmp(simdSin, sin(vecRight));
+		cmp(simdCos, cos(vecRight));
 	}
 }
 static void testIntVectors()
@@ -150,10 +192,14 @@ static void testIntVectors()
 	cmp(simdLeft > simdRight, vecLeft > vecRight);
 	cmp(simdLeft <= simdRight, vecLeft <= vecRight);
 	cmp(simdLeft >= simdRight, vecLeft >= vecRight);
-	cmp(compare(simdLeft, simdLeft), compare(vecLeft, vecLeft));
-	cmp(compare(simdRight, simdRight), compare(vecRight, vecRight));
-	cmp(compare(simdLeft, simdRight), compare(vecLeft, vecRight));
+	cmp(equal(simdLeft, simdLeft), equal(vecLeft, vecLeft));
+	cmp(equal(simdRight, simdRight), equal(vecRight, vecRight));
+	cmp(equal(simdLeft, simdRight), equal(vecLeft, vecRight));
+	cmp(notEqual(simdLeft, simdLeft), notEqual(vecLeft, vecLeft));
+	cmp(notEqual(simdRight, simdRight), notEqual(vecRight, vecRight));
+	cmp(notEqual(simdLeft, simdRight), notEqual(vecLeft, vecRight));
 	cmp(select(simd_u32_4(1, 0, 0, 1), simdLeft, simdRight), select(uint4(1, 0, 0, 1), vecLeft, vecRight));
+	cmp(select(simd_u32_4(0, 1, 0, 1), simdRight, simdLeft), select(uint4(0, 1, 0, 1), vecRight, vecLeft));
 	cmp(min(simdLeft, simdRight), min(vecLeft, vecRight));
 	cmp(max(simdLeft, simdRight), max(vecLeft, vecRight));
 }
@@ -190,10 +236,14 @@ static void testUintVectors()
 	cmp(areAllTrue(simd_u32_4(UINT32_MAX)), areAllTrue(uint4(UINT32_MAX)));
 	cmp(areAllFalse(simd_u32_4(0u)), areAllTrue(uint4(0u)));
 	cmp(areAnyTrue(simd_u32_4(0, 1, 0, 0)), areAllTrue(uint4(0, 1, 0, 0)));
-	cmp(compare(simdLeft, simdLeft), compare(vecLeft, vecLeft));
-	cmp(compare(simdRight, simdRight), compare(vecRight, vecRight));
-	cmp(compare(simdLeft, simdRight), compare(vecLeft, vecRight));
+	cmp(equal(simdLeft, simdLeft), equal(vecLeft, vecLeft));
+	cmp(equal(simdRight, simdRight), equal(vecRight, vecRight));
+	cmp(equal(simdLeft, simdRight), equal(vecLeft, vecRight));
+	cmp(notEqual(simdLeft, simdLeft), notEqual(vecLeft, vecLeft));
+	cmp(notEqual(simdRight, simdRight), notEqual(vecRight, vecRight));
+	cmp(notEqual(simdLeft, simdRight), notEqual(vecLeft, vecRight));
 	cmp(select(simd_u32_4(1, 0, 1, 0), simdLeft, simdRight), select(uint4(1, 0, 1, 0), vecLeft, vecRight));
+	cmp(select(simd_u32_4(0, 1, 1, 0), simdRight, simdLeft), select(uint4(0, 1, 1, 0), vecRight, vecLeft));
 	cmp(min(simdLeft, simdRight), min(vecLeft, vecRight));
 	cmp(max(simdLeft, simdRight), max(vecLeft, vecRight));
 }