mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-22 06:34:36 +00:00
pdqsort: Fixed overflow and simplified block partition logic. (2dc2bbd)
This commit is contained in:
parent
3b9e7a1838
commit
00e24e2b8c
@ -171,18 +171,18 @@ namespace pdqsort_detail {
|
|||||||
template<class Iter>
|
template<class Iter>
|
||||||
tracy_force_inline void swap_offsets(Iter first, Iter last,
|
tracy_force_inline void swap_offsets(Iter first, Iter last,
|
||||||
unsigned char* offsets_l, unsigned char* offsets_r,
|
unsigned char* offsets_l, unsigned char* offsets_r,
|
||||||
int num, bool use_swaps) {
|
size_t num, bool use_swaps) {
|
||||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||||
if (use_swaps) {
|
if (use_swaps) {
|
||||||
// This case is needed for the descending distribution, where we need
|
// This case is needed for the descending distribution, where we need
|
||||||
// to have proper swapping for pdqsort to remain O(n).
|
// to have proper swapping for pdqsort to remain O(n).
|
||||||
for (int i = 0; i < num; ++i) {
|
for (size_t i = 0; i < num; ++i) {
|
||||||
std::iter_swap(first + offsets_l[i], last - offsets_r[i]);
|
std::iter_swap(first + offsets_l[i], last - offsets_r[i]);
|
||||||
}
|
}
|
||||||
} else if (num > 0) {
|
} else if (num > 0) {
|
||||||
Iter l = first + offsets_l[0]; Iter r = last - offsets_r[0];
|
Iter l = first + offsets_l[0]; Iter r = last - offsets_r[0];
|
||||||
T tmp(PDQSORT_PREFER_MOVE(*l)); *l = PDQSORT_PREFER_MOVE(*r);
|
T tmp(PDQSORT_PREFER_MOVE(*l)); *l = PDQSORT_PREFER_MOVE(*r);
|
||||||
for (int i = 1; i < num; ++i) {
|
for (size_t i = 1; i < num; ++i) {
|
||||||
l = first + offsets_l[i]; *r = PDQSORT_PREFER_MOVE(*l);
|
l = first + offsets_l[i]; *r = PDQSORT_PREFER_MOVE(*l);
|
||||||
r = last - offsets_r[i]; *l = PDQSORT_PREFER_MOVE(*r);
|
r = last - offsets_r[i]; *l = PDQSORT_PREFER_MOVE(*r);
|
||||||
}
|
}
|
||||||
@ -219,108 +219,94 @@ namespace pdqsort_detail {
|
|||||||
if (!already_partitioned) {
|
if (!already_partitioned) {
|
||||||
std::iter_swap(first, last);
|
std::iter_swap(first, last);
|
||||||
++first;
|
++first;
|
||||||
}
|
|
||||||
|
|
||||||
// The following branchless partitioning is derived from "BlockQuicksort: How Branch
|
// The following branchless partitioning is derived from "BlockQuicksort: How Branch
|
||||||
// Mispredictions don’t affect Quicksort" by Stefan Edelkamp and Armin Weiss.
|
// Mispredictions don’t affect Quicksort" by Stefan Edelkamp and Armin Weiss, but
|
||||||
unsigned char offsets_l_storage[block_size + cacheline_size];
|
// heavily micro-optimized.
|
||||||
unsigned char offsets_r_storage[block_size + cacheline_size];
|
unsigned char offsets_l_storage[block_size + cacheline_size];
|
||||||
unsigned char* offsets_l = align_cacheline(offsets_l_storage);
|
unsigned char offsets_r_storage[block_size + cacheline_size];
|
||||||
unsigned char* offsets_r = align_cacheline(offsets_r_storage);
|
unsigned char* offsets_l = align_cacheline(offsets_l_storage);
|
||||||
int num_l, num_r, start_l, start_r;
|
unsigned char* offsets_r = align_cacheline(offsets_r_storage);
|
||||||
num_l = num_r = start_l = start_r = 0;
|
|
||||||
|
Iter offsets_l_base = first;
|
||||||
while (last - first > 2 * block_size) {
|
Iter offsets_r_base = last;
|
||||||
// Fill up offset blocks with elements that are on the wrong side.
|
size_t num_l, num_r, start_l, start_r;
|
||||||
if (num_l == 0) {
|
num_l = num_r = start_l = start_r = 0;
|
||||||
start_l = 0;
|
|
||||||
Iter it = first;
|
while (first < last) {
|
||||||
for (unsigned char i = 0; i < block_size;) {
|
// Fill up offset blocks with elements that are on the wrong side.
|
||||||
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
// First we determine how much elements are considered for each offset block.
|
||||||
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
size_t num_unknown = last - first;
|
||||||
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
size_t left_split = num_l == 0 ? (num_r == 0 ? num_unknown / 2 : num_unknown) : 0;
|
||||||
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
size_t right_split = num_r == 0 ? (num_unknown - left_split) : 0;
|
||||||
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
|
||||||
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
// Fill the offset blocks.
|
||||||
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
if (left_split >= block_size) {
|
||||||
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
for (size_t i = 0; i < block_size;) {
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (size_t i = 0; i < left_split;) {
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (num_r == 0) {
|
if (right_split >= block_size) {
|
||||||
start_r = 0;
|
for (size_t i = 0; i < block_size;) {
|
||||||
Iter it = last;
|
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||||
for (unsigned char i = 0; i < block_size;) {
|
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||||
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||||
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||||
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||||
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||||
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||||
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||||
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
}
|
||||||
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
} else {
|
||||||
|
for (size_t i = 0; i < right_split;) {
|
||||||
|
offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Swap elements and update block sizes and first/last boundaries.
|
||||||
|
size_t num = std::min(num_l, num_r);
|
||||||
|
swap_offsets(offsets_l_base, offsets_r_base,
|
||||||
|
offsets_l + start_l, offsets_r + start_r,
|
||||||
|
num, num_l == num_r);
|
||||||
|
num_l -= num; num_r -= num;
|
||||||
|
start_l += num; start_r += num;
|
||||||
|
|
||||||
|
if (num_l == 0) {
|
||||||
|
start_l = 0;
|
||||||
|
offsets_l_base = first;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num_r == 0) {
|
||||||
|
start_r = 0;
|
||||||
|
offsets_r_base = last;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Swap elements and update block sizes and first/last boundaries.
|
// We have now fully identified [first, last)'s proper position. Swap the last elements.
|
||||||
int num = std::min(num_l, num_r);
|
if (num_l) {
|
||||||
swap_offsets(first, last, offsets_l + start_l, offsets_r + start_r,
|
offsets_l += start_l;
|
||||||
num, num_l == num_r);
|
while (num_l--) std::iter_swap(offsets_l_base + offsets_l[num_l], --last);
|
||||||
num_l -= num; num_r -= num;
|
first = last;
|
||||||
start_l += num; start_r += num;
|
|
||||||
if (num_l == 0) first += block_size;
|
|
||||||
if (num_r == 0) last -= block_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
int l_size = 0, r_size = 0;
|
|
||||||
int unknown_left = (int)(last - first) - ((num_r || num_l) ? block_size : 0);
|
|
||||||
if (num_r) {
|
|
||||||
// Handle leftover block by assigning the unknown elements to the other block.
|
|
||||||
l_size = unknown_left;
|
|
||||||
r_size = block_size;
|
|
||||||
} else if (num_l) {
|
|
||||||
l_size = block_size;
|
|
||||||
r_size = unknown_left;
|
|
||||||
} else {
|
|
||||||
// No leftover block, split the unknown elements in two blocks.
|
|
||||||
l_size = unknown_left/2;
|
|
||||||
r_size = unknown_left - l_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill offset buffers if needed.
|
|
||||||
if (unknown_left && !num_l) {
|
|
||||||
start_l = 0;
|
|
||||||
Iter it = first;
|
|
||||||
for (unsigned char i = 0; i < l_size;) {
|
|
||||||
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
|
||||||
}
|
}
|
||||||
}
|
if (num_r) {
|
||||||
if (unknown_left && !num_r) {
|
offsets_r += start_r;
|
||||||
start_r = 0;
|
while (num_r--) std::iter_swap(offsets_r_base - offsets_r[num_r], first), ++first;
|
||||||
Iter it = last;
|
last = first;
|
||||||
for (unsigned char i = 0; i < r_size;) {
|
|
||||||
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int num = std::min(num_l, num_r);
|
|
||||||
swap_offsets(first, last, offsets_l + start_l, offsets_r + start_r, num, num_l == num_r);
|
|
||||||
num_l -= num; num_r -= num;
|
|
||||||
start_l += num; start_r += num;
|
|
||||||
if (num_l == 0) first += l_size;
|
|
||||||
if (num_r == 0) last -= r_size;
|
|
||||||
|
|
||||||
// We have now fully identified [first, last)'s proper position. Swap the last elements.
|
|
||||||
if (num_l) {
|
|
||||||
offsets_l += start_l;
|
|
||||||
while (num_l--) std::iter_swap(first + offsets_l[num_l], --last);
|
|
||||||
first = last;
|
|
||||||
}
|
|
||||||
if (num_r) {
|
|
||||||
offsets_r += start_r;
|
|
||||||
while (num_r--) std::iter_swap(last - offsets_r[num_r], first), ++first;
|
|
||||||
last = first;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Put the pivot in the right place.
|
// Put the pivot in the right place.
|
||||||
Iter pivot_pos = first - 1;
|
Iter pivot_pos = first - 1;
|
||||||
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
|
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
|
||||||
@ -329,6 +315,8 @@ namespace pdqsort_detail {
|
|||||||
return std::make_pair(pivot_pos, already_partitioned);
|
return std::make_pair(pivot_pos, already_partitioned);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
|
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
|
||||||
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
|
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
|
||||||
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
|
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
|
||||||
|
Loading…
Reference in New Issue
Block a user