
If the comparison operation is equivalent to < and that is a total order, we know that we can use equality comparison on that type instead to extract some information. Furthermore, if equality comparison on that type is trivial, the user can't observe that we're calling it. So instead of using the user-provided total order, we use std::mismatch, which uses equality comparison (and is vertorized). Additionally, if the type is trivially lexicographically comparable, we can go one step further and use std::memcmp directly instead of calling std::mismatch. Benchmarks: ``` ------------------------------------------------------------------------------------- Benchmark old new ------------------------------------------------------------------------------------- bm_lexicographical_compare<unsigned char>/1 1.17 ns 2.34 ns bm_lexicographical_compare<unsigned char>/2 1.64 ns 2.57 ns bm_lexicographical_compare<unsigned char>/3 2.23 ns 2.58 ns bm_lexicographical_compare<unsigned char>/4 2.82 ns 2.57 ns bm_lexicographical_compare<unsigned char>/5 3.34 ns 2.11 ns bm_lexicographical_compare<unsigned char>/6 3.94 ns 2.21 ns bm_lexicographical_compare<unsigned char>/7 4.56 ns 2.11 ns bm_lexicographical_compare<unsigned char>/8 5.25 ns 2.11 ns bm_lexicographical_compare<unsigned char>/16 9.88 ns 2.11 ns bm_lexicographical_compare<unsigned char>/64 38.9 ns 2.36 ns bm_lexicographical_compare<unsigned char>/512 317 ns 6.54 ns bm_lexicographical_compare<unsigned char>/4096 2517 ns 41.4 ns bm_lexicographical_compare<unsigned char>/32768 20052 ns 488 ns bm_lexicographical_compare<unsigned char>/262144 159579 ns 4409 ns bm_lexicographical_compare<unsigned char>/1048576 640456 ns 20342 ns bm_lexicographical_compare<signed char>/1 1.18 ns 2.37 ns bm_lexicographical_compare<signed char>/2 1.65 ns 2.60 ns bm_lexicographical_compare<signed char>/3 2.23 ns 2.83 ns bm_lexicographical_compare<signed char>/4 2.81 ns 3.06 ns bm_lexicographical_compare<signed char>/5 3.35 ns 3.30 ns bm_lexicographical_compare<signed char>/6 3.90 ns 3.99 ns bm_lexicographical_compare<signed char>/7 4.56 ns 3.78 ns bm_lexicographical_compare<signed char>/8 5.20 ns 4.02 ns bm_lexicographical_compare<signed char>/16 9.80 ns 6.21 ns bm_lexicographical_compare<signed char>/64 39.0 ns 3.16 ns bm_lexicographical_compare<signed char>/512 318 ns 7.58 ns bm_lexicographical_compare<signed char>/4096 2514 ns 47.4 ns bm_lexicographical_compare<signed char>/32768 20096 ns 504 ns bm_lexicographical_compare<signed char>/262144 156617 ns 4146 ns bm_lexicographical_compare<signed char>/1048576 624265 ns 19810 ns bm_lexicographical_compare<int>/1 1.15 ns 2.12 ns bm_lexicographical_compare<int>/2 1.60 ns 2.36 ns bm_lexicographical_compare<int>/3 2.21 ns 2.59 ns bm_lexicographical_compare<int>/4 2.74 ns 2.83 ns bm_lexicographical_compare<int>/5 3.26 ns 3.06 ns bm_lexicographical_compare<int>/6 3.81 ns 4.53 ns bm_lexicographical_compare<int>/7 4.41 ns 4.72 ns bm_lexicographical_compare<int>/8 5.08 ns 2.36 ns bm_lexicographical_compare<int>/16 9.54 ns 3.08 ns bm_lexicographical_compare<int>/64 37.8 ns 4.71 ns bm_lexicographical_compare<int>/512 309 ns 24.6 ns bm_lexicographical_compare<int>/4096 2422 ns 204 ns bm_lexicographical_compare<int>/32768 19362 ns 1947 ns bm_lexicographical_compare<int>/262144 155727 ns 19793 ns bm_lexicographical_compare<int>/1048576 623614 ns 80180 ns bm_ranges_lexicographical_compare<unsigned char>/1 1.07 ns 2.35 ns bm_ranges_lexicographical_compare<unsigned char>/2 1.72 ns 2.13 ns bm_ranges_lexicographical_compare<unsigned char>/3 2.46 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/4 3.17 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/5 3.86 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/6 4.55 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/7 5.25 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/8 5.95 ns 2.13 ns bm_ranges_lexicographical_compare<unsigned char>/16 11.7 ns 2.13 ns bm_ranges_lexicographical_compare<unsigned char>/64 45.5 ns 2.36 ns bm_ranges_lexicographical_compare<unsigned char>/512 366 ns 6.35 ns bm_ranges_lexicographical_compare<unsigned char>/4096 2886 ns 40.9 ns bm_ranges_lexicographical_compare<unsigned char>/32768 23054 ns 489 ns bm_ranges_lexicographical_compare<unsigned char>/262144 185302 ns 4339 ns bm_ranges_lexicographical_compare<unsigned char>/1048576 741576 ns 19430 ns bm_ranges_lexicographical_compare<signed char>/1 1.10 ns 2.12 ns bm_ranges_lexicographical_compare<signed char>/2 1.66 ns 2.35 ns bm_ranges_lexicographical_compare<signed char>/3 2.23 ns 2.58 ns bm_ranges_lexicographical_compare<signed char>/4 2.82 ns 2.82 ns bm_ranges_lexicographical_compare<signed char>/5 3.34 ns 3.06 ns bm_ranges_lexicographical_compare<signed char>/6 3.92 ns 3.99 ns bm_ranges_lexicographical_compare<signed char>/7 4.64 ns 4.10 ns bm_ranges_lexicographical_compare<signed char>/8 5.21 ns 4.61 ns bm_ranges_lexicographical_compare<signed char>/16 9.79 ns 7.42 ns bm_ranges_lexicographical_compare<signed char>/64 38.9 ns 2.93 ns bm_ranges_lexicographical_compare<signed char>/512 317 ns 7.31 ns bm_ranges_lexicographical_compare<signed char>/4096 2500 ns 47.5 ns bm_ranges_lexicographical_compare<signed char>/32768 19940 ns 496 ns bm_ranges_lexicographical_compare<signed char>/262144 159166 ns 4393 ns bm_ranges_lexicographical_compare<signed char>/1048576 638206 ns 19786 ns bm_ranges_lexicographical_compare<int>/1 1.10 ns 2.12 ns bm_ranges_lexicographical_compare<int>/2 1.64 ns 3.04 ns bm_ranges_lexicographical_compare<int>/3 2.23 ns 2.58 ns bm_ranges_lexicographical_compare<int>/4 2.81 ns 2.81 ns bm_ranges_lexicographical_compare<int>/5 3.35 ns 3.05 ns bm_ranges_lexicographical_compare<int>/6 3.94 ns 4.60 ns bm_ranges_lexicographical_compare<int>/7 4.60 ns 4.81 ns bm_ranges_lexicographical_compare<int>/8 5.19 ns 2.35 ns bm_ranges_lexicographical_compare<int>/16 9.85 ns 2.87 ns bm_ranges_lexicographical_compare<int>/64 38.9 ns 4.70 ns bm_ranges_lexicographical_compare<int>/512 318 ns 24.5 ns bm_ranges_lexicographical_compare<int>/4096 2494 ns 202 ns bm_ranges_lexicographical_compare<int>/32768 20000 ns 1939 ns bm_ranges_lexicographical_compare<int>/262144 160433 ns 19730 ns bm_ranges_lexicographical_compare<int>/1048576 642636 ns 80760 ns ```
174 lines
6.7 KiB
C++
174 lines
6.7 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef _LIBCPP___ALGORITHM_RANGES_MINMAX_H
|
|
#define _LIBCPP___ALGORITHM_RANGES_MINMAX_H
|
|
|
|
#include <__algorithm/min_max_result.h>
|
|
#include <__algorithm/minmax_element.h>
|
|
#include <__assert>
|
|
#include <__concepts/copyable.h>
|
|
#include <__concepts/same_as.h>
|
|
#include <__config>
|
|
#include <__functional/identity.h>
|
|
#include <__functional/invoke.h>
|
|
#include <__functional/ranges_operations.h>
|
|
#include <__iterator/concepts.h>
|
|
#include <__iterator/next.h>
|
|
#include <__iterator/projected.h>
|
|
#include <__ranges/access.h>
|
|
#include <__ranges/concepts.h>
|
|
#include <__type_traits/desugars_to.h>
|
|
#include <__type_traits/is_reference.h>
|
|
#include <__type_traits/is_trivially_copyable.h>
|
|
#include <__type_traits/remove_cvref.h>
|
|
#include <__utility/forward.h>
|
|
#include <__utility/move.h>
|
|
#include <__utility/pair.h>
|
|
#include <initializer_list>
|
|
|
|
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
|
# pragma GCC system_header
|
|
#endif
|
|
|
|
#if _LIBCPP_STD_VER >= 20
|
|
|
|
_LIBCPP_PUSH_MACROS
|
|
# include <__undef_macros>
|
|
|
|
_LIBCPP_BEGIN_NAMESPACE_STD
|
|
|
|
namespace ranges {
|
|
template <class _T1>
|
|
using minmax_result = min_max_result<_T1>;
|
|
|
|
struct __minmax {
|
|
template <class _Type,
|
|
class _Proj = identity,
|
|
indirect_strict_weak_order<projected<const _Type*, _Proj>> _Comp = ranges::less>
|
|
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result<const _Type&>
|
|
operator()(_LIBCPP_LIFETIMEBOUND const _Type& __a,
|
|
_LIBCPP_LIFETIMEBOUND const _Type& __b,
|
|
_Comp __comp = {},
|
|
_Proj __proj = {}) const {
|
|
if (std::invoke(__comp, std::invoke(__proj, __b), std::invoke(__proj, __a)))
|
|
return {__b, __a};
|
|
return {__a, __b};
|
|
}
|
|
|
|
template <copyable _Type,
|
|
class _Proj = identity,
|
|
indirect_strict_weak_order<projected<const _Type*, _Proj>> _Comp = ranges::less>
|
|
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result<_Type>
|
|
operator()(initializer_list<_Type> __il, _Comp __comp = {}, _Proj __proj = {}) const {
|
|
_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
|
|
__il.begin() != __il.end(), "initializer_list has to contain at least one element");
|
|
auto __iters = std::__minmax_element_impl(__il.begin(), __il.end(), __comp, __proj);
|
|
return ranges::minmax_result<_Type>{*__iters.first, *__iters.second};
|
|
}
|
|
|
|
template <input_range _Range,
|
|
class _Proj = identity,
|
|
indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>> _Comp = ranges::less>
|
|
requires indirectly_copyable_storable<iterator_t<_Range>, range_value_t<_Range>*>
|
|
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result<range_value_t<_Range>>
|
|
operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const {
|
|
auto __first = ranges::begin(__r);
|
|
auto __last = ranges::end(__r);
|
|
using _ValueT = range_value_t<_Range>;
|
|
|
|
_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__first != __last, "range has to contain at least one element");
|
|
|
|
// This optimiation is not in minmax_element because clang doesn't see through the pointers and as a result doesn't
|
|
// vectorize the code.
|
|
if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> &&
|
|
__is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value &&
|
|
__desugars_to_v<__totally_ordered_less_tag, _Comp, _ValueT, _ValueT>) {
|
|
minmax_result<_ValueT> __result = {__r[0], __r[0]};
|
|
for (auto __e : __r) {
|
|
if (__e < __result.min)
|
|
__result.min = __e;
|
|
if (__result.max < __e)
|
|
__result.max = __e;
|
|
}
|
|
return __result;
|
|
} else if constexpr (forward_range<_Range>) {
|
|
// Special-case the one element case. Avoid repeatedly initializing objects from the result of an iterator
|
|
// dereference when doing so might not be idempotent. The `if constexpr` avoids the extra branch in cases where
|
|
// it's not needed.
|
|
if constexpr (!same_as<remove_cvref_t<range_reference_t<_Range>>, _ValueT> ||
|
|
is_rvalue_reference_v<range_reference_t<_Range>>) {
|
|
if (ranges::next(__first) == __last) {
|
|
// During initialization, members are allowed to refer to already initialized members
|
|
// (see http://eel.is/c++draft/dcl.init.aggr#6)
|
|
minmax_result<_ValueT> __result = {*__first, __result.min};
|
|
return __result;
|
|
}
|
|
}
|
|
auto __result = std::__minmax_element_impl(__first, __last, __comp, __proj);
|
|
return {*__result.first, *__result.second};
|
|
} else {
|
|
// input_iterators can't be copied, so the implementation for input_iterators has to store
|
|
// the values instead of a pointer to the correct values
|
|
auto __less = [&](auto&& __a, auto&& __b) -> bool {
|
|
return std::invoke(__comp,
|
|
std::invoke(__proj, std::forward<decltype(__a)>(__a)),
|
|
std::invoke(__proj, std::forward<decltype(__b)>(__b)));
|
|
};
|
|
|
|
// During initialization, members are allowed to refer to already initialized members
|
|
// (see http://eel.is/c++draft/dcl.init.aggr#6)
|
|
ranges::minmax_result<_ValueT> __result = {*__first, __result.min};
|
|
if (__first == __last || ++__first == __last)
|
|
return __result;
|
|
|
|
if (__less(*__first, __result.min))
|
|
__result.min = *__first;
|
|
else
|
|
__result.max = *__first;
|
|
|
|
while (++__first != __last) {
|
|
_ValueT __i = *__first;
|
|
if (++__first == __last) {
|
|
if (__less(__i, __result.min))
|
|
__result.min = __i;
|
|
else if (!__less(__i, __result.max))
|
|
__result.max = __i;
|
|
return __result;
|
|
}
|
|
|
|
if (__less(*__first, __i)) {
|
|
if (__less(*__first, __result.min))
|
|
__result.min = *__first;
|
|
if (!__less(__i, __result.max))
|
|
__result.max = std::move(__i);
|
|
} else {
|
|
if (__less(__i, __result.min))
|
|
__result.min = std::move(__i);
|
|
if (!__less(*__first, __result.max))
|
|
__result.max = *__first;
|
|
}
|
|
}
|
|
return __result;
|
|
}
|
|
}
|
|
};
|
|
|
|
inline namespace __cpo {
|
|
inline constexpr auto minmax = __minmax{};
|
|
} // namespace __cpo
|
|
} // namespace ranges
|
|
|
|
_LIBCPP_END_NAMESPACE_STD
|
|
|
|
_LIBCPP_POP_MACROS
|
|
|
|
#endif // _LIBCPP_STD_VER >= 20
|
|
|
|
#endif // _LIBCPP___ALGORITHM_RANGES_MINMAX_H
|