CoCalc -- parallel.h

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/manifold/src/parallel.h
²¹¹⁰² views
1
// Copyright 2022 The Manifold Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
//
15
// Simple implementation of selected functions in PSTL.
16
// Iterators must be RandomAccessIterator.
17

18
#pragma once
19

20
#include "iters.h"
21
#if (MANIFOLD_PAR == 1)
22
#include <tbb/combinable.h>
23
#include <tbb/parallel_for.h>
24
#include <tbb/parallel_invoke.h>
25
#include <tbb/parallel_reduce.h>
26
#include <tbb/parallel_scan.h>
27
#endif
28
#include <algorithm>
29
#include <numeric>
30

31
namespace manifold {
32

33
enum class ExecutionPolicy {
34
  Par,
35
  Seq,
36
};
37

38
constexpr size_t kSeqThreshold = 1e4;
39
// ExecutionPolicy:
40
// - Sequential for small workload,
41
// - Parallel (CPU) for medium workload,
42
inline constexpr ExecutionPolicy autoPolicy(size_t size,
43
                                            size_t threshold = kSeqThreshold) {
44
  if (size <= threshold) {
45
    return ExecutionPolicy::Seq;
46
  }
47
  return ExecutionPolicy::Par;
48
}
49

50
template <typename Iter,
51
          typename Dummy = std::enable_if_t<!std::is_integral_v<Iter>>>
52
inline constexpr ExecutionPolicy autoPolicy(Iter first, Iter last,
53
                                            size_t threshold = kSeqThreshold) {
54
  if (static_cast<size_t>(std::distance(first, last)) <= threshold) {
55
    return ExecutionPolicy::Seq;
56
  }
57
  return ExecutionPolicy::Par;
58
}
59

60
template <typename InputIter, typename OutputIter>
61
void copy(ExecutionPolicy policy, InputIter first, InputIter last,
62
          OutputIter d_first);
63
template <typename InputIter, typename OutputIter>
64
void copy(InputIter first, InputIter last, OutputIter d_first);
65

66
#if (MANIFOLD_PAR == 1)
67
namespace details {
68
using manifold::kSeqThreshold;
69
// implementation from
70
// https://duvanenko.tech.blog/2018/01/14/parallel-merge/
71
// https://github.com/DragonSpit/ParallelAlgorithms
72
// note that the ranges are now [p, r) to fit our convention.
73
template <typename SrcIter, typename DestIter, typename Comp>
74
void mergeRec(SrcIter src, DestIter dest, size_t p1, size_t r1, size_t p2,
75
              size_t r2, size_t p3, Comp comp) {
76
  size_t length1 = r1 - p1;
77
  size_t length2 = r2 - p2;
78
  if (length1 < length2) {
79
    std::swap(p1, p2);
80
    std::swap(r1, r2);
81
    std::swap(length1, length2);
82
  }
83
  if (length1 == 0) return;
84
  if (length1 + length2 <= kSeqThreshold) {
85
    std::merge(src + p1, src + r1, src + p2, src + r2, dest + p3, comp);
86
  } else {
87
    size_t q1 = p1 + length1 / 2;
88
    size_t q2 =
89
        std::distance(src, std::lower_bound(src + p2, src + r2, src[q1], comp));
90
    size_t q3 = p3 + (q1 - p1) + (q2 - p2);
91
    dest[q3] = src[q1];
92
    tbb::parallel_invoke(
93
        [=] { mergeRec(src, dest, p1, q1, p2, q2, p3, comp); },
94
        [=] { mergeRec(src, dest, q1 + 1, r1, q2, r2, q3 + 1, comp); });
95
  }
96
}
97

98
template <typename SrcIter, typename DestIter, typename Comp>
99
void mergeSortRec(SrcIter src, DestIter dest, size_t begin, size_t end,
100
                  Comp comp) {
101
  size_t numElements = end - begin;
102
  if (numElements <= kSeqThreshold) {
103
    std::copy(src + begin, src + end, dest + begin);
104
    std::stable_sort(dest + begin, dest + end, comp);
105
  } else {
106
    size_t middle = begin + numElements / 2;
107
    tbb::parallel_invoke([=] { mergeSortRec(dest, src, begin, middle, comp); },
108
                         [=] { mergeSortRec(dest, src, middle, end, comp); });
109
    mergeRec(src, dest, begin, middle, middle, end, begin, comp);
110
  }
111
}
112

113
template <typename T, typename InputIter, typename OutputIter, typename BinOp>
114
struct ScanBody {
115
  T sum;
116
  T identity;
117
  BinOp& f;
118
  InputIter input;
119
  OutputIter output;
120

121
  ScanBody(T sum, T identity, BinOp& f, InputIter input, OutputIter output)
122
      : sum(sum), identity(identity), f(f), input(input), output(output) {}
123
  ScanBody(ScanBody& b, tbb::split)
124
      : sum(b.identity),
125
        identity(b.identity),
126
        f(b.f),
127
        input(b.input),
128
        output(b.output) {}
129
  template <typename Tag>
130
  void operator()(const tbb::blocked_range<size_t>& r, Tag) {
131
    T temp = sum;
132
    for (size_t i = r.begin(); i < r.end(); ++i) {
133
      T inputTmp = input[i];
134
      if (Tag::is_final_scan()) output[i] = temp;
135
      temp = f(temp, inputTmp);
136
    }
137
    sum = temp;
138
  }
139
  T get_sum() const { return sum; }
140
  void reverse_join(ScanBody& a) { sum = f(a.sum, sum); }
141
  void assign(ScanBody& b) { sum = b.sum; }
142
};
143

144
template <typename InputIter, typename OutputIter, typename P>
145
struct CopyIfScanBody {
146
  size_t sum;
147
  P& pred;
148
  InputIter input;
149
  OutputIter output;
150

151
  CopyIfScanBody(P& pred, InputIter input, OutputIter output)
152
      : sum(0), pred(pred), input(input), output(output) {}
153
  CopyIfScanBody(CopyIfScanBody& b, tbb::split)
154
      : sum(0), pred(b.pred), input(b.input), output(b.output) {}
155
  template <typename Tag>
156
  void operator()(const tbb::blocked_range<size_t>& r, Tag) {
157
    size_t temp = sum;
158
    for (size_t i = r.begin(); i < r.end(); ++i) {
159
      if (pred(i)) {
160
        temp += 1;
161
        if (Tag::is_final_scan()) output[temp - 1] = input[i];
162
      }
163
    }
164
    sum = temp;
165
  }
166
  size_t get_sum() const { return sum; }
167
  void reverse_join(CopyIfScanBody& a) { sum = a.sum + sum; }
168
  void assign(CopyIfScanBody& b) { sum = b.sum; }
169
};
170

171
template <typename N, const int K>
172
struct Hist {
173
  using SizeType = N;
174
  static constexpr int k = K;
175
  N hist[k][256] = {{0}};
176
  void merge(const Hist<N, K>& other) {
177
    for (int i = 0; i < k; ++i)
178
      for (int j = 0; j < 256; ++j) hist[i][j] += other.hist[i][j];
179
  }
180
  void prefixSum(N total, bool* canSkip) {
181
    for (int i = 0; i < k; ++i) {
182
      size_t count = 0;
183
      for (int j = 0; j < 256; ++j) {
184
        N tmp = hist[i][j];
185
        hist[i][j] = count;
186
        count += tmp;
187
        if (tmp == total) canSkip[i] = true;
188
      }
189
    }
190
  }
191
};
192

193
template <typename T, typename H>
194
void histogram(T* ptr, typename H::SizeType n, H& hist) {
195
  auto worker = [](T* ptr, typename H::SizeType n, H& hist) {
196
    for (typename H::SizeType i = 0; i < n; ++i)
197
      for (int k = 0; k < hist.k; ++k)
198
        ++hist.hist[k][(ptr[i] >> (8 * k)) & 0xFF];
199
  };
200
  if (n < kSeqThreshold) {
201
    worker(ptr, n, hist);
202
  } else {
203
    tbb::combinable<H> store;
204
    tbb::parallel_for(
205
        tbb::blocked_range<typename H::SizeType>(0, n, kSeqThreshold),
206
        [&worker, &store, ptr](const auto& r) {
207
          worker(ptr + r.begin(), r.end() - r.begin(), store.local());
208
        });
209
    store.combine_each([&hist](const H& h) { hist.merge(h); });
210
  }
211
}
212

213
template <typename T, typename H>
214
void shuffle(T* src, T* target, typename H::SizeType n, H& hist, int k) {
215
  for (typename H::SizeType i = 0; i < n; ++i)
216
    target[hist.hist[k][(src[i] >> (8 * k)) & 0xFF]++] = src[i];
217
}
218

219
template <typename T, typename SizeType>
220
bool LSB_radix_sort(T* input, T* tmp, SizeType n) {
221
  Hist<SizeType, sizeof(T) / sizeof(char)> hist;
222
  if (std::is_sorted(input, input + n)) return false;
223
  histogram(input, n, hist);
224
  bool canSkip[hist.k] = {0};
225
  hist.prefixSum(n, canSkip);
226
  T *a = input, *b = tmp;
227
  for (int k = 0; k < hist.k; ++k) {
228
    if (!canSkip[k]) {
229
      shuffle(a, b, n, hist, k);
230
      std::swap(a, b);
231
    }
232
  }
233
  return a == tmp;
234
}
235

236
// LSB radix sort with merge
237
template <typename T, typename SizeType>
238
struct SortedRange {
239
  T *input, *tmp;
240
  SizeType offset = 0, length = 0;
241
  bool inTmp = false;
242

243
  SortedRange(T* input, T* tmp, SizeType offset = 0, SizeType length = 0)
244
      : input(input), tmp(tmp), offset(offset), length(length) {}
245
  SortedRange(SortedRange<T, SizeType>& r, tbb::split)
246
      : input(r.input), tmp(r.tmp) {}
247
  // FIXME: no idea why thread sanitizer reports data race here
248
#if defined(__has_feature)
249
#if __has_feature(thread_sanitizer)
250
  __attribute__((no_sanitize("thread")))
251
#endif
252
#endif
253
  void
254
  operator()(const tbb::blocked_range<SizeType>& range) {
255
    SortedRange<T, SizeType> rhs(input, tmp, range.begin(),
256
                                 range.end() - range.begin());
257
    rhs.inTmp =
258
        LSB_radix_sort(input + rhs.offset, tmp + rhs.offset, rhs.length);
259
    if (length == 0)
260
      *this = rhs;
261
    else
262
      join(rhs);
263
  }
264
  bool swapBuffer() const {
265
    T *src = input, *target = tmp;
266
    if (inTmp) std::swap(src, target);
267
    copy(src + offset, src + offset + length, target + offset);
268
    return !inTmp;
269
  }
270
  void join(const SortedRange<T, SizeType>& rhs) {
271
    if (inTmp != rhs.inTmp) {
272
      if (length < rhs.length)
273
        inTmp = swapBuffer();
274
      else
275
        rhs.swapBuffer();
276
    }
277
    T *src = input, *target = tmp;
278
    if (inTmp) std::swap(src, target);
279
    if (src[offset + length - 1] > src[rhs.offset]) {
280
      mergeRec(src, target, offset, offset + length, rhs.offset,
281
               rhs.offset + rhs.length, offset, std::less<T>());
282
      inTmp = !inTmp;
283
    }
284
    length += rhs.length;
285
  }
286
};
287

288
template <typename T, typename SizeTy>
289
void radix_sort(T* input, SizeTy n) {
290
  T* aux = new T[n];
291
  SizeTy blockSize = std::max(n / tbb::this_task_arena::max_concurrency() / 4,
292
                              static_cast<SizeTy>(kSeqThreshold / sizeof(T)));
293
  SortedRange<T, SizeTy> result(input, aux);
294
  tbb::parallel_reduce(tbb::blocked_range<SizeTy>(0, n, blockSize), result);
295
  if (result.inTmp) copy(aux, aux + n, input);
296
  delete[] aux;
297
}
298

299
template <typename Iterator,
300
          typename T = typename std::iterator_traits<Iterator>::value_type,
301
          typename Comp = decltype(std::less<T>())>
302
void mergeSort(ExecutionPolicy policy, Iterator first, Iterator last,
303
               Comp comp) {
304
#if (MANIFOLD_PAR == 1)
305
  if (policy == ExecutionPolicy::Par) {
306
    // apparently this prioritizes threads inside here?
307
    tbb::this_task_arena::isolate([&] {
308
      size_t length = std::distance(first, last);
309
      T* tmp = new T[length];
310
      copy(policy, first, last, tmp);
311
      details::mergeSortRec(tmp, first, 0, length, comp);
312
      delete[] tmp;
313
    });
314
    return;
315
  }
316
#endif
317
  std::stable_sort(first, last, comp);
318
}
319

320
// stable_sort using merge sort.
321
//
322
// For simpler implementation, we do not support types that are not trivially
323
// destructable.
324
template <typename Iterator,
325
          typename T = typename std::iterator_traits<Iterator>::value_type,
326
          typename Dummy = void>
327
struct SortFunctor {
328
  void operator()(ExecutionPolicy policy, Iterator first, Iterator last) {
329
    static_assert(
330
        std::is_convertible_v<
331
            typename std::iterator_traits<Iterator>::iterator_category,
332
            std::random_access_iterator_tag>,
333
        "You can only parallelize RandomAccessIterator.");
334
    static_assert(std::is_trivially_destructible_v<T>,
335
                  "Our simple implementation does not support types that are "
336
                  "not trivially destructable.");
337
    return mergeSort(policy, first, last, std::less<T>());
338
  }
339
};
340

341
// stable_sort specialized with radix sort for integral types.
342
// Typically faster than merge sort.
343
template <typename Iterator, typename T>
344
struct SortFunctor<
345
    Iterator, T,
346
    std::enable_if_t<
347
        std::is_integral_v<T> &&
348
        std::is_pointer_v<typename std::iterator_traits<Iterator>::pointer>>> {
349
  void operator()(ExecutionPolicy policy, Iterator first, Iterator last) {
350
    static_assert(
351
        std::is_convertible_v<
352
            typename std::iterator_traits<Iterator>::iterator_category,
353
            std::random_access_iterator_tag>,
354
        "You can only parallelize RandomAccessIterator.");
355
    static_assert(std::is_trivially_destructible_v<T>,
356
                  "Our simple implementation does not support types that are "
357
                  "not trivially destructable.");
358
#if (MANIFOLD_PAR == 1)
359
    if (policy == ExecutionPolicy::Par) {
360
      radix_sort(&*first, static_cast<size_t>(std::distance(first, last)));
361
      return;
362
    }
363
#endif
364
    stable_sort(policy, first, last, std::less<T>());
365
  }
366
};
367

368
}  // namespace details
369

370
#endif
371

372
// Applies the function `f` to each element in the range `[first, last)`
373
template <typename Iter, typename F>
374
void for_each(ExecutionPolicy policy, Iter first, Iter last, F f) {
375
  static_assert(std::is_convertible_v<
376
                    typename std::iterator_traits<Iter>::iterator_category,
377
                    std::random_access_iterator_tag>,
378
                "You can only parallelize RandomAccessIterator.");
379
  (void)policy;
380
#if (MANIFOLD_PAR == 1)
381
  if (policy == ExecutionPolicy::Par) {
382
    tbb::this_task_arena::isolate([&]() {
383
      tbb::parallel_for(tbb::blocked_range<Iter>(first, last),
384
                        [&f](const tbb::blocked_range<Iter>& range) {
385
                          for (Iter i = range.begin(); i != range.end(); i++)
386
                            f(*i);
387
                        });
388
    });
389
    return;
390
  }
391
#endif
392
  std::for_each(first, last, f);
393
}
394

395
// Applies the function `f` to each element in the range `[first, last)`
396
template <typename Iter, typename F>
397
void for_each_n(ExecutionPolicy policy, Iter first, size_t n, F f) {
398
  static_assert(std::is_convertible_v<
399
                    typename std::iterator_traits<Iter>::iterator_category,
400
                    std::random_access_iterator_tag>,
401
                "You can only parallelize RandomAccessIterator.");
402
  for_each(policy, first, first + n, f);
403
}
404

405
// Reduce the range `[first, last)` using a binary operation `f` with an initial
406
// value `init`.
407
//
408
// The binary operation should be commutative and associative. Otherwise, the
409
// result is non-deterministic.
410
template <typename InputIter, typename BinaryOp,
411
          typename T = typename std::iterator_traits<InputIter>::value_type>
412
T reduce(ExecutionPolicy policy, InputIter first, InputIter last, T init,
413
         BinaryOp f) {
414
  static_assert(std::is_convertible_v<
415
                    typename std::iterator_traits<InputIter>::iterator_category,
416
                    std::random_access_iterator_tag>,
417
                "You can only parallelize RandomAccessIterator.");
418
  (void)policy;
419
#if (MANIFOLD_PAR == 1)
420
  if (policy == ExecutionPolicy::Par) {
421
    // should we use deterministic reduce here?
422
    return tbb::this_task_arena::isolate([&]() {
423
      return tbb::parallel_reduce(
424
          tbb::blocked_range<InputIter>(first, last, details::kSeqThreshold),
425
          init,
426
          [&f](const tbb::blocked_range<InputIter>& range, T value) {
427
            return std::reduce(range.begin(), range.end(), value, f);
428
          },
429
          f);
430
    });
431
  }
432
#endif
433
  return std::reduce(first, last, init, f);
434
}
435

436
// Reduce the range `[first, last)` using a binary operation `f` with an initial
437
// value `init`.
438
//
439
// The binary operation should be commutative and associative. Otherwise, the
440
// result is non-deterministic.
441
template <typename InputIter, typename BinaryOp,
442
          typename T = typename std::iterator_traits<InputIter>::value_type>
443
T reduce(InputIter first, InputIter last, T init, BinaryOp f) {
444
  return reduce(autoPolicy(first, last, 1e5), first, last, init, f);
445
}
446

447
// Transform and reduce the range `[first, last)` by first applying a unary
448
// function `g`, and then combining the results using a binary operation `f`
449
// with an initial value `init`.
450
//
451
// The binary operation should be commutative and associative. Otherwise, the
452
// result is non-deterministic.
453
template <typename InputIter, typename BinaryOp, typename UnaryOp,
454
          typename T = std::invoke_result_t<
455
              UnaryOp, typename std::iterator_traits<InputIter>::value_type>>
456
T transform_reduce(ExecutionPolicy policy, InputIter first, InputIter last,
457
                   T init, BinaryOp f, UnaryOp g) {
458
  return reduce(policy, TransformIterator(first, g), TransformIterator(last, g),
459
                init, f);
460
}
461

462
// Transform and reduce the range `[first, last)` by first applying a unary
463
// function `g`, and then combining the results using a binary operation `f`
464
// with an initial value `init`.
465
//
466
// The binary operation should be commutative and associative. Otherwise, the
467
// result is non-deterministic.
468
template <typename InputIter, typename BinaryOp, typename UnaryOp,
469
          typename T = std::invoke_result_t<
470
              UnaryOp, typename std::iterator_traits<InputIter>::value_type>>
471
T transform_reduce(InputIter first, InputIter last, T init, BinaryOp f,
472
                   UnaryOp g) {
473
  return manifold::reduce(TransformIterator(first, g),
474
                          TransformIterator(last, g), init, f);
475
}
476

477
// Compute the inclusive prefix sum for the range `[first, last)`
478
// using the summation operator, and store the result in the range
479
// starting from `d_first`.
480
//
481
// The input range `[first, last)` and
482
// the output range `[d_first, d_first + last - first)`
483
// must be equal or non-overlapping.
484
template <typename InputIter, typename OutputIter,
485
          typename T = typename std::iterator_traits<InputIter>::value_type>
486
void inclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last,
487
                    OutputIter d_first) {
488
  static_assert(std::is_convertible_v<
489
                    typename std::iterator_traits<InputIter>::iterator_category,
490
                    std::random_access_iterator_tag>,
491
                "You can only parallelize RandomAccessIterator.");
492
  static_assert(
493
      std::is_convertible_v<
494
          typename std::iterator_traits<OutputIter>::iterator_category,
495
          std::random_access_iterator_tag>,
496
      "You can only parallelize RandomAccessIterator.");
497
  (void)policy;
498
#if (MANIFOLD_PAR == 1)
499
  if (policy == ExecutionPolicy::Par) {
500
    tbb::this_task_arena::isolate([&]() {
501
      tbb::parallel_scan(
502
          tbb::blocked_range<size_t>(0, std::distance(first, last)),
503
          static_cast<T>(0),
504
          [&](const tbb::blocked_range<size_t>& range, T sum,
505
              bool is_final_scan) {
506
            T temp = sum;
507
            for (size_t i = range.begin(); i < range.end(); ++i) {
508
              temp = temp + first[i];
509
              if (is_final_scan) d_first[i] = temp;
510
            }
511
            return temp;
512
          },
513
          std::plus<T>());
514
    });
515
    return;
516
  }
517
#endif
518
  std::inclusive_scan(first, last, d_first);
519
}
520

521
// Compute the inclusive prefix sum for the range `[first, last)` using the
522
// summation operator, and store the result in the range
523
// starting from `d_first`.
524
//
525
// The input range `[first, last)` and
526
// the output range `[d_first, d_first + last - first)`
527
// must be equal or non-overlapping.
528
template <typename InputIter, typename OutputIter,
529
          typename T = typename std::iterator_traits<InputIter>::value_type>
530
void inclusive_scan(InputIter first, InputIter last, OutputIter d_first) {
531
  return inclusive_scan(autoPolicy(first, last, 1e5), first, last, d_first);
532
}
533

534
// Compute the inclusive prefix sum for the range `[first, last)` using the
535
// binary operator `f`, with initial value `init` and
536
// identity element `identity`, and store the result in the range
537
// starting from `d_first`.
538
//
539
// This is different from `exclusive_scan` in the sequential algorithm by
540
// requiring an identity element. This is needed so that each block can be
541
// scanned in parallel and combined later.
542
//
543
// The input range `[first, last)` and
544
// the output range `[d_first, d_first + last - first)`
545
// must be equal or non-overlapping.
546
template <typename InputIter, typename OutputIter,
547
          typename BinOp = decltype(std::plus<typename std::iterator_traits<
548
                                        InputIter>::value_type>()),
549
          typename T = typename std::iterator_traits<InputIter>::value_type>
550
void exclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last,
551
                    OutputIter d_first, T init = static_cast<T>(0),
552
                    BinOp f = std::plus<T>(), T identity = static_cast<T>(0)) {
553
  static_assert(std::is_convertible_v<
554
                    typename std::iterator_traits<InputIter>::iterator_category,
555
                    std::random_access_iterator_tag>,
556
                "You can only parallelize RandomAccessIterator.");
557
  static_assert(
558
      std::is_convertible_v<
559
          typename std::iterator_traits<OutputIter>::iterator_category,
560
          std::random_access_iterator_tag>,
561
      "You can only parallelize RandomAccessIterator.");
562
  (void)policy;
563
  (void)identity;
564
#if (MANIFOLD_PAR == 1)
565
  if (policy == ExecutionPolicy::Par) {
566
    details::ScanBody<T, InputIter, OutputIter, BinOp> body(init, identity, f,
567
                                                            first, d_first);
568
    tbb::this_task_arena::isolate([&]() {
569
      tbb::parallel_scan(
570
          tbb::blocked_range<size_t>(0, std::distance(first, last)), body);
571
    });
572
    return;
573
  }
574
#endif
575
  std::exclusive_scan(first, last, d_first, init, f);
576
}
577

578
// Compute the inclusive prefix sum for the range `[first, last)` using the
579
// binary operator `f`, with initial value `init` and
580
// identity element `identity`, and store the result in the range
581
// starting from `d_first`.
582
//
583
// This is different from `exclusive_scan` in the sequential algorithm by
584
// requiring an identity element. This is needed so that each block can be
585
// scanned in parallel and combined later.
586
//
587
// The input range `[first, last)` and
588
// the output range `[d_first, d_first + last - first)`
589
// must be equal or non-overlapping.
590
template <typename InputIter, typename OutputIter,
591
          typename BinOp = decltype(std::plus<typename std::iterator_traits<
592
                                        InputIter>::value_type>()),
593
          typename T = typename std::iterator_traits<InputIter>::value_type>
594
void exclusive_scan(InputIter first, InputIter last, OutputIter d_first,
595
                    T init = static_cast<T>(0), BinOp f = std::plus<T>(),
596
                    T identity = static_cast<T>(0)) {
597
  exclusive_scan(autoPolicy(first, last, 1e5), first, last, d_first, init, f,
598
                 identity);
599
}
600

601
// Apply function `f` on the input range `[first, last)` and store the result in
602
// the range starting from `d_first`.
603
//
604
// The input range `[first, last)` and
605
// the output range `[d_first, d_first + last - first)`
606
// must be equal or non-overlapping.
607
template <typename InputIter, typename OutputIter, typename F>
608
void transform(ExecutionPolicy policy, InputIter first, InputIter last,
609
               OutputIter d_first, F f) {
610
  static_assert(std::is_convertible_v<
611
                    typename std::iterator_traits<InputIter>::iterator_category,
612
                    std::random_access_iterator_tag>,
613
                "You can only parallelize RandomAccessIterator.");
614
  static_assert(
615
      std::is_convertible_v<
616
          typename std::iterator_traits<OutputIter>::iterator_category,
617
          std::random_access_iterator_tag>,
618
      "You can only parallelize RandomAccessIterator.");
619
  (void)policy;
620
#if (MANIFOLD_PAR == 1)
621
  if (policy == ExecutionPolicy::Par) {
622
    tbb::this_task_arena::isolate([&]() {
623
      tbb::parallel_for(tbb::blocked_range<size_t>(
624
                            0, static_cast<size_t>(std::distance(first, last))),
625
                        [&](const tbb::blocked_range<size_t>& range) {
626
                          std::transform(first + range.begin(),
627
                                         first + range.end(),
628
                                         d_first + range.begin(), f);
629
                        });
630
    });
631
    return;
632
  }
633
#endif
634
  std::transform(first, last, d_first, f);
635
}
636

637
// Apply function `f` on the input range `[first, last)` and store the result in
638
// the range starting from `d_first`.
639
//
640
// The input range `[first, last)` and
641
// the output range `[d_first, d_first + last - first)`
642
// must be equal or non-overlapping.
643
template <typename InputIter, typename OutputIter, typename F>
644
void transform(InputIter first, InputIter last, OutputIter d_first, F f) {
645
  transform(autoPolicy(first, last, 1e5), first, last, d_first, f);
646
}
647

648
// Copy the input range `[first, last)` to the output range
649
// starting from `d_first`.
650
//
651
// The input range `[first, last)` and
652
// the output range `[d_first, d_first + last - first)`
653
// must not overlap.
654
template <typename InputIter, typename OutputIter>
655
void copy(ExecutionPolicy policy, InputIter first, InputIter last,
656
          OutputIter d_first) {
657
  static_assert(std::is_convertible_v<
658
                    typename std::iterator_traits<InputIter>::iterator_category,
659
                    std::random_access_iterator_tag>,
660
                "You can only parallelize RandomAccessIterator.");
661
  static_assert(
662
      std::is_convertible_v<
663
          typename std::iterator_traits<OutputIter>::iterator_category,
664
          std::random_access_iterator_tag>,
665
      "You can only parallelize RandomAccessIterator.");
666
  (void)policy;
667
#if (MANIFOLD_PAR == 1)
668
  if (policy == ExecutionPolicy::Par) {
669
    tbb::this_task_arena::isolate([&]() {
670
      tbb::parallel_for(tbb::blocked_range<size_t>(
671
                            0, static_cast<size_t>(std::distance(first, last)),
672
                            details::kSeqThreshold),
673
                        [&](const tbb::blocked_range<size_t>& range) {
674
                          std::copy(first + range.begin(), first + range.end(),
675
                                    d_first + range.begin());
676
                        });
677
    });
678
    return;
679
  }
680
#endif
681
  std::copy(first, last, d_first);
682
}
683

684
// Copy the input range `[first, last)` to the output range
685
// starting from `d_first`.
686
//
687
// The input range `[first, last)` and
688
// the output range `[d_first, d_first + last - first)`
689
// must not overlap.
690
template <typename InputIter, typename OutputIter>
691
void copy(InputIter first, InputIter last, OutputIter d_first) {
692
  copy(autoPolicy(first, last, 1e6), first, last, d_first);
693
}
694

695
// Copy the input range `[first, first + n)` to the output range
696
// starting from `d_first`.
697
//
698
// The input range `[first, first + n)` and
699
// the output range `[d_first, d_first + n)`
700
// must not overlap.
701
template <typename InputIter, typename OutputIter>
702
void copy_n(ExecutionPolicy policy, InputIter first, size_t n,
703
            OutputIter d_first) {
704
  copy(policy, first, first + n, d_first);
705
}
706

707
// Copy the input range `[first, first + n)` to the output range
708
// starting from `d_first`.
709
//
710
// The input range `[first, first + n)` and
711
// the output range `[d_first, d_first + n)`
712
// must not overlap.
713
template <typename InputIter, typename OutputIter>
714
void copy_n(InputIter first, size_t n, OutputIter d_first) {
715
  copy(autoPolicy(n, 1e6), first, first + n, d_first);
716
}
717

718
// Fill the range `[first, last)` with `value`.
719
template <typename OutputIter, typename T>
720
void fill(ExecutionPolicy policy, OutputIter first, OutputIter last, T value) {
721
  static_assert(
722
      std::is_convertible_v<
723
          typename std::iterator_traits<OutputIter>::iterator_category,
724
          std::random_access_iterator_tag>,
725
      "You can only parallelize RandomAccessIterator.");
726
  (void)policy;
727
#if (MANIFOLD_PAR == 1)
728
  if (policy == ExecutionPolicy::Par) {
729
    tbb::this_task_arena::isolate([&]() {
730
      tbb::parallel_for(tbb::blocked_range<OutputIter>(first, last),
731
                        [&](const tbb::blocked_range<OutputIter>& range) {
732
                          std::fill(range.begin(), range.end(), value);
733
                        });
734
    });
735
    return;
736
  }
737
#endif
738
  std::fill(first, last, value);
739
}
740

741
// Fill the range `[first, last)` with `value`.
742
template <typename OutputIter, typename T>
743
void fill(OutputIter first, OutputIter last, T value) {
744
  fill(autoPolicy(first, last, 5e5), first, last, value);
745
}
746

747
// Count the number of elements in the input range `[first, last)` satisfying
748
// predicate `pred`, i.e. `pred(x) == true`.
749
template <typename InputIter, typename P>
750
size_t count_if(ExecutionPolicy policy, InputIter first, InputIter last,
751
                P pred) {
752
  (void)policy;
753
#if (MANIFOLD_PAR == 1)
754
  if (policy == ExecutionPolicy::Par) {
755
    return reduce(policy, TransformIterator(first, pred),
756
                  TransformIterator(last, pred), 0, std::plus<size_t>());
757
  }
758
#endif
759
  return std::count_if(first, last, pred);
760
}
761

762
// Count the number of elements in the input range `[first, last)` satisfying
763
// predicate `pred`, i.e. `pred(x) == true`.
764
template <typename InputIter, typename P>
765
size_t count_if(InputIter first, InputIter last, P pred) {
766
  return count_if(autoPolicy(first, last, 1e4), first, last, pred);
767
}
768

769
// Check if all elements in the input range `[first, last)` satisfy
770
// predicate `pred`, i.e. `pred(x) == true`.
771
template <typename InputIter, typename P>
772
bool all_of(ExecutionPolicy policy, InputIter first, InputIter last, P pred) {
773
  static_assert(std::is_convertible_v<
774
                    typename std::iterator_traits<InputIter>::iterator_category,
775
                    std::random_access_iterator_tag>,
776
                "You can only parallelize RandomAccessIterator.");
777
  (void)policy;
778
#if (MANIFOLD_PAR == 1)
779
  if (policy == ExecutionPolicy::Par) {
780
    // should we use deterministic reduce here?
781
    return tbb::this_task_arena::isolate([&]() {
782
      return tbb::parallel_reduce(
783
          tbb::blocked_range<InputIter>(first, last), true,
784
          [&](const tbb::blocked_range<InputIter>& range, bool value) {
785
            if (!value) return false;
786
            for (InputIter i = range.begin(); i != range.end(); i++)
787
              if (!pred(*i)) return false;
788
            return true;
789
          },
790
          [](bool a, bool b) { return a && b; });
791
    });
792
  }
793
#endif
794
  return std::all_of(first, last, pred);
795
}
796

797
// Check if all elements in the input range `[first, last)` satisfy
798
// predicate `pred`, i.e. `pred(x) == true`.
799
template <typename InputIter, typename P>
800
bool all_of(InputIter first, InputIter last, P pred) {
801
  return all_of(autoPolicy(first, last, 1e5), first, last, pred);
802
}
803

804
// Copy values in the input range `[first, last)` to the output range
805
// starting from `d_first` that satisfies the predicate `pred`,
806
// i.e. `pred(x) == true`, and returns `d_first + n` where `n` is the number of
807
// times the predicate is evaluated to true.
808
//
809
// This function is stable, meaning that the relative order of elements in the
810
// output range remains unchanged.
811
//
812
// The input range `[first, last)` and
813
// the output range `[d_first, d_first + last - first)`
814
// must not overlap.
815
template <typename InputIter, typename OutputIter, typename P>
816
OutputIter copy_if(ExecutionPolicy policy, InputIter first, InputIter last,
817
                   OutputIter d_first, P pred) {
818
  static_assert(std::is_convertible_v<
819
                    typename std::iterator_traits<InputIter>::iterator_category,
820
                    std::random_access_iterator_tag>,
821
                "You can only parallelize RandomAccessIterator.");
822
  static_assert(
823
      std::is_convertible_v<
824
          typename std::iterator_traits<OutputIter>::iterator_category,
825
          std::random_access_iterator_tag>,
826
      "You can only parallelize RandomAccessIterator.");
827
  (void)policy;
828
#if (MANIFOLD_PAR == 1)
829
  if (policy == ExecutionPolicy::Par) {
830
    auto pred2 = [&](size_t i) { return pred(first[i]); };
831
    details::CopyIfScanBody body(pred2, first, d_first);
832
    tbb::this_task_arena::isolate([&]() {
833
      tbb::parallel_scan(
834
          tbb::blocked_range<size_t>(0, std::distance(first, last)), body);
835
      return d_first + body.get_sum();
836
    });
837
  }
838
#endif
839
  return std::copy_if(first, last, d_first, pred);
840
}
841

842
// Copy values in the input range `[first, last)` to the output range
843
// starting from `d_first` that satisfies the predicate `pred`, i.e. `pred(x) ==
844
// true`, and returns `d_first + n` where `n` is the number of times the
845
// predicate is evaluated to true.
846
//
847
// This function is stable, meaning that the relative order of elements in the
848
// output range remains unchanged.
849
//
850
// The input range `[first, last)` and
851
// the output range `[d_first, d_first + last - first)`
852
// must not overlap.
853
template <typename InputIter, typename OutputIter, typename P>
854
OutputIter copy_if(InputIter first, InputIter last, OutputIter d_first,
855
                   P pred) {
856
  return copy_if(autoPolicy(first, last, 1e5), first, last, d_first, pred);
857
}
858

859
// Remove values in the input range `[first, last)` that satisfies
860
// the predicate `pred`, i.e. `pred(x) == true`, and returns `first + n`
861
// where `n` is the number of times the predicate is evaluated to false.
862
//
863
// This function is stable, meaning that the relative order of elements that
864
// remained are unchanged.
865
//
866
// Only trivially destructable types are supported.
867
template <typename Iter, typename P,
868
          typename T = typename std::iterator_traits<Iter>::value_type>
869
Iter remove_if(ExecutionPolicy policy, Iter first, Iter last, P pred) {
870
  static_assert(std::is_convertible_v<
871
                    typename std::iterator_traits<Iter>::iterator_category,
872
                    std::random_access_iterator_tag>,
873
                "You can only parallelize RandomAccessIterator.");
874
  static_assert(std::is_trivially_destructible_v<T>,
875
                "Our simple implementation does not support types that are "
876
                "not trivially destructable.");
877
  (void)policy;
878
#if (MANIFOLD_PAR == 1)
879
  if (policy == ExecutionPolicy::Par) {
880
    T* tmp = new T[std::distance(first, last)];
881
    auto back =
882
        copy_if(policy, first, last, tmp, [&](T v) { return !pred(v); });
883
    copy(policy, tmp, back, first);
884
    auto d = std::distance(tmp, back);
885
    delete[] tmp;
886
    return first + d;
887
  }
888
#endif
889
  return std::remove_if(first, last, pred);
890
}
891

892
// Remove values in the input range `[first, last)` that satisfies
893
// the predicate `pred`, i.e. `pred(x) == true`, and
894
// returns `first + n` where `n` is the number of times the predicate is
895
// evaluated to false.
896
//
897
// This function is stable, meaning that the relative order of elements that
898
// remained are unchanged.
899
//
900
// Only trivially destructable types are supported.
901
template <typename Iter, typename P,
902
          typename T = typename std::iterator_traits<Iter>::value_type>
903
Iter remove_if(Iter first, Iter last, P pred) {
904
  return remove_if(autoPolicy(first, last, 1e4), first, last, pred);
905
}
906

907
// Remove values in the input range `[first, last)` that are equal to `value`.
908
// Returns `first + n` where `n` is the number of values
909
// that are not equal to `value`.
910
//
911
// This function is stable, meaning that the relative order of elements that
912
// remained are unchanged.
913
//
914
// Only trivially destructable types are supported.
915
template <typename Iter,
916
          typename T = typename std::iterator_traits<Iter>::value_type>
917
Iter remove(ExecutionPolicy policy, Iter first, Iter last, T value) {
918
  static_assert(std::is_convertible_v<
919
                    typename std::iterator_traits<Iter>::iterator_category,
920
                    std::random_access_iterator_tag>,
921
                "You can only parallelize RandomAccessIterator.");
922
  static_assert(std::is_trivially_destructible_v<T>,
923
                "Our simple implementation does not support types that are "
924
                "not trivially destructable.");
925
  (void)policy;
926
#if (MANIFOLD_PAR == 1)
927
  if (policy == ExecutionPolicy::Par) {
928
    T* tmp = new T[std::distance(first, last)];
929
    auto back =
930
        copy_if(policy, first, last, tmp, [&](T v) { return v != value; });
931
    copy(policy, tmp, back, first);
932
    auto d = std::distance(tmp, back);
933
    delete[] tmp;
934
    return first + d;
935
  }
936
#endif
937
  return std::remove(first, last, value);
938
}
939

940
// Remove values in the input range `[first, last)` that are equal to `value`.
941
// Returns `first + n` where `n` is the number of values
942
// that are not equal to `value`.
943
//
944
// This function is stable, meaning that the relative order of elements that
945
// remained are unchanged.
946
//
947
// Only trivially destructable types are supported.
948
template <typename Iter,
949
          typename T = typename std::iterator_traits<Iter>::value_type>
950
Iter remove(Iter first, Iter last, T value) {
951
  return remove(autoPolicy(first, last, 1e4), first, last, value);
952
}
953

954
// For each group of consecutive elements in the range `[first, last)` with the
955
// same value, unique removes all but the first element of the group. The return
956
// value is an iterator `new_last` such that no two consecutive elements in the
957
// range `[first, new_last)` are equal.
958
//
959
// This function is stable, meaning that the relative order of elements that
960
// remained are unchanged.
961
//
962
// Only trivially destructable types are supported.
963
template <typename Iter,
964
          typename T = typename std::iterator_traits<Iter>::value_type>
965
Iter unique(ExecutionPolicy policy, Iter first, Iter last) {
966
  static_assert(std::is_convertible_v<
967
                    typename std::iterator_traits<Iter>::iterator_category,
968
                    std::random_access_iterator_tag>,
969
                "You can only parallelize RandomAccessIterator.");
970
  static_assert(std::is_trivially_destructible_v<T>,
971
                "Our simple implementation does not support types that are "
972
                "not trivially destructable.");
973
  (void)policy;
974
#if (MANIFOLD_PAR == 1)
975
  if (policy == ExecutionPolicy::Par && first != last) {
976
    Iter newSrcStart = first;
977
    // cap the maximum buffer size, proved to be beneficial for unique with huge
978
    // array size
979
    constexpr size_t MAX_BUFFER_SIZE = 1 << 16;
980
    T* tmp = new T[std::min(MAX_BUFFER_SIZE,
981
                            static_cast<size_t>(std::distance(first, last)))];
982
    auto pred = [&](size_t i) { return tmp[i] != tmp[i + 1]; };
983
    do {
984
      size_t length =
985
          std::min(MAX_BUFFER_SIZE,
986
                   static_cast<size_t>(std::distance(newSrcStart, last)));
987
      copy(policy, newSrcStart, newSrcStart + length, tmp);
988
      *first = *newSrcStart;
989
      // this is not a typo, the index i is offset by 1, so to compare an
990
      // element with its predecessor we need to compare i and i + 1.
991
      details::CopyIfScanBody body(pred, tmp + 1, first + 1);
992
      tbb::this_task_arena::isolate([&]() {
993
        tbb::parallel_scan(tbb::blocked_range<size_t>(0, length - 1), body);
994
      });
995
      first += body.get_sum() + 1;
996
      newSrcStart += length;
997
    } while (newSrcStart != last);
998
    delete[] tmp;
999
    return first;
1000
  }
1001
#endif
1002
  return std::unique(first, last);
1003
}
1004

1005
// For each group of consecutive elements in the range `[first, last)` with the
1006
// same value, unique removes all but the first element of the group. The return
1007
// value is an iterator `new_last` such that no two consecutive elements in the
1008
// range `[first, new_last)` are equal.
1009
//
1010
// This function is stable, meaning that the relative order of elements that
1011
// remained are unchanged.
1012
//
1013
// Only trivially destructable types are supported.
1014
template <typename Iter,
1015
          typename T = typename std::iterator_traits<Iter>::value_type>
1016
Iter unique(Iter first, Iter last) {
1017
  return unique(autoPolicy(first, last, 1e4), first, last);
1018
}
1019

1020
// Sort the input range `[first, last)` in ascending order.
1021
//
1022
// This function is stable, meaning that the relative order of elements that are
1023
// incomparable remains unchanged.
1024
//
1025
// Only trivially destructable types are supported.
1026
template <typename Iterator,
1027
          typename T = typename std::iterator_traits<Iterator>::value_type>
1028
void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last) {
1029
  (void)policy;
1030
#if (MANIFOLD_PAR == 1)
1031
  details::SortFunctor<Iterator, T>()(policy, first, last);
1032
#else
1033
  std::stable_sort(first, last);
1034
#endif
1035
}
1036

1037
// Sort the input range `[first, last)` in ascending order.
1038
//
1039
// This function is stable, meaning that the relative order of elements that are
1040
// incomparable remains unchanged.
1041
//
1042
// Only trivially destructable types are supported.
1043
template <typename Iterator,
1044
          typename T = typename std::iterator_traits<Iterator>::value_type>
1045
void stable_sort(Iterator first, Iterator last) {
1046
  stable_sort(autoPolicy(first, last, 1e4), first, last);
1047
}
1048

1049
// Sort the input range `[first, last)` in ascending order using the comparison
1050
// function `comp`.
1051
//
1052
// This function is stable, meaning that the relative order of elements that are
1053
// incomparable remains unchanged.
1054
//
1055
// Only trivially destructable types are supported.
1056
template <typename Iterator,
1057
          typename T = typename std::iterator_traits<Iterator>::value_type,
1058
          typename Comp = decltype(std::less<T>())>
1059
void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last,
1060
                 Comp comp) {
1061
  (void)policy;
1062
#if (MANIFOLD_PAR == 1)
1063
  details::mergeSort(policy, first, last, comp);
1064
#else
1065
  std::stable_sort(first, last, comp);
1066
#endif
1067
}
1068

1069
// Sort the input range `[first, last)` in ascending order using the comparison
1070
// function `comp`.
1071
//
1072
// This function is stable, meaning that the relative order of elements that are
1073
// incomparable remains unchanged.
1074
//
1075
// Only trivially destructable types are supported.
1076
template <typename Iterator,
1077
          typename T = typename std::iterator_traits<Iterator>::value_type,
1078
          typename Comp = decltype(std::less<T>())>
1079
void stable_sort(Iterator first, Iterator last, Comp comp) {
1080
  stable_sort(autoPolicy(first, last, 1e4), first, last, comp);
1081
}
1082

1083
// `scatter` copies elements from a source range into an output array according
1084
// to a map. For each iterator `i` in the range `[first, last)`, the value `*i`
1085
// is assigned to `outputFirst[mapFirst[i - first]]`.  If the same index appears
1086
// more than once in the range `[mapFirst, mapFirst + (last - first))`, the
1087
// result is undefined.
1088
//
1089
// The map range, input range and the output range must not overlap.
1090
template <typename InputIterator1, typename InputIterator2,
1091
          typename OutputIterator>
1092
void scatter(ExecutionPolicy policy, InputIterator1 first, InputIterator1 last,
1093
             InputIterator2 mapFirst, OutputIterator outputFirst) {
1094
  for_each(policy, countAt(0),
1095
           countAt(static_cast<size_t>(std::distance(first, last))),
1096
           [first, mapFirst, outputFirst](size_t i) {
1097
             outputFirst[mapFirst[i]] = first[i];
1098
           });
1099
}
1100

1101
// `scatter` copies elements from a source range into an output array according
1102
// to a map. For each iterator `i` in the range `[first, last)`, the value `*i`
1103
// is assigned to `outputFirst[mapFirst[i - first]]`. If the same index appears
1104
// more than once in the range `[mapFirst, mapFirst + (last - first))`,
1105
// the result is undefined.
1106
//
1107
// The map range, input range and the output range must not overlap.
1108
template <typename InputIterator1, typename InputIterator2,
1109
          typename OutputIterator>
1110
void scatter(InputIterator1 first, InputIterator1 last, InputIterator2 mapFirst,
1111
             OutputIterator outputFirst) {
1112
  scatter(autoPolicy(first, last, 1e5), first, last, mapFirst, outputFirst);
1113
}
1114

1115
// `gather` copies elements from a source array into a destination range
1116
// according to a map. For each input iterator `i`
1117
// in the range `[mapFirst, mapLast)`, the value `inputFirst[*i]`
1118
// is assigned to `outputFirst[i - map_first]`.
1119
//
1120
// The map range, input range and the output range must not overlap.
1121
template <typename InputIterator, typename RandomAccessIterator,
1122
          typename OutputIterator>
1123
void gather(ExecutionPolicy policy, InputIterator mapFirst,
1124
            InputIterator mapLast, RandomAccessIterator inputFirst,
1125
            OutputIterator outputFirst) {
1126
  for_each(policy, countAt(0),
1127
           countAt(static_cast<size_t>(std::distance(mapFirst, mapLast))),
1128
           [mapFirst, inputFirst, outputFirst](size_t i) {
1129
             outputFirst[i] = inputFirst[mapFirst[i]];
1130
           });
1131
}
1132

1133
// `gather` copies elements from a source array into a destination range
1134
// according to a map. For each input iterator `i`
1135
// in the range `[mapFirst, mapLast)`, the value `inputFirst[*i]`
1136
// is assigned to `outputFirst[i - map_first]`.
1137
//
1138
// The map range, input range and the output range must not overlap.
1139
template <typename InputIterator, typename RandomAccessIterator,
1140
          typename OutputIterator>
1141
void gather(InputIterator mapFirst, InputIterator mapLast,
1142
            RandomAccessIterator inputFirst, OutputIterator outputFirst) {
1143
  gather(autoPolicy(std::distance(mapFirst, mapLast), 1e5), mapFirst, mapLast,
1144
         inputFirst, outputFirst);
1145
}
1146

1147
// Write `[0, last - first)` to the range `[first, last)`.
1148
template <typename Iterator>
1149
void sequence(ExecutionPolicy policy, Iterator first, Iterator last) {
1150
  for_each(policy, countAt(0),
1151
           countAt(static_cast<size_t>(std::distance(first, last))),
1152
           [first](size_t i) { first[i] = i; });
1153
}
1154

1155
// Write `[0, last - first)` to the range `[first, last)`.
1156
template <typename Iterator>
1157
void sequence(Iterator first, Iterator last) {
1158
  sequence(autoPolicy(first, last, 1e5), first, last);
1159
}
1160

1161
}  // namespace manifold
1162

1163
Product

Resources

Company