From 0bd6479f854e7a2faf8f36428099c2473e175b9d Mon Sep 17 00:00:00 2001
From: Bartosz Taudul <wolf@nereid.pl>
Date: Sat, 1 May 2021 17:30:34 +0200
Subject: [PATCH] Optimizer IM_FIXNORMAL2F.

---
 imgui/imgui_draw.cpp   | 2 +-
 imgui/imgui_internal.h | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/imgui/imgui_draw.cpp b/imgui/imgui_draw.cpp
index a9d3149c..b5833c3f 100644
--- a/imgui/imgui_draw.cpp
+++ b/imgui/imgui_draw.cpp
@@ -695,7 +695,7 @@ void ImDrawList::PrimQuadUV(const ImVec2& a, const ImVec2& b, const ImVec2& c, c
 // On AddPolyline() and AddConvexPolyFilled() we intentionally avoid using ImVec2 and superfluous function calls to optimize debug/non-inlined builds.
 // Those macros expects l-values.
 #define IM_NORMALIZE2F_OVER_ZERO(VX,VY)     do { float d2 = VX*VX + VY*VY; if (d2 > 0.0f) { float inv_len = ImRsqrt(d2); VX *= inv_len; VY *= inv_len; } } while (0)
-#define IM_FIXNORMAL2F(VX,VY)               do { float d2 = VX*VX + VY*VY; if (d2 < 0.5f) d2 = 0.5f; float inv_lensq = 1.0f / d2; VX *= inv_lensq; VY *= inv_lensq; } while (0)
+#define IM_FIXNORMAL2F(VX,VY)               do { float d2 = VX*VX + VY*VY; if (d2 < 0.5f) d2 = 0.5f; float inv_lensq = ImRecip(d2); VX *= inv_lensq; VY *= inv_lensq; } while (0)
 
 // TODO: Thickness anti-aliased lines cap are missing their AA fringe.
 // We avoid using the ImVec2 math operators here to reduce cost to a minimum for debug/non-inlined builds.
diff --git a/imgui/imgui_internal.h b/imgui/imgui_internal.h
index a5503374..b012eedd 100644
--- a/imgui/imgui_internal.h
+++ b/imgui/imgui_internal.h
@@ -396,6 +396,12 @@ static inline float  ImRsqrt(float x)           { return _mm_cvtss_f32(_mm_rsqrt
 static inline float  ImRsqrt(float x)           { return 1.0f / sqrtf(x); }
 #endif
 static inline double ImRsqrt(double x)          { return 1.0 / sqrt(x); }
+#if defined __SSE__ || defined __x86_64__ || defined _M_X64
+static inline float  ImRecip(float x)           { return _mm_cvtss_f32(_mm_rcp_ps(_mm_set_ss(x))); }
+#else
+static inline float  ImRecip(float x)           { return 1.0f / x; }
+#endif
+static inline double ImRecip(double x)          { return 1.0 / x; }
 #endif
 // - ImMin/ImMax/ImClamp/ImLerp/ImSwap are used by widgets which support variety of types: signed/unsigned int/long long float/double
 // (Exceptionally using templates here but we could also redefine them for those types)