VTK  9.6.20260422
vtkSMPTools.h
Go to the documentation of this file.
1// SPDX-FileCopyrightText: Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
2// SPDX-License-Identifier: BSD-3-Clause
23
24#ifndef vtkSMPTools_h
25#define vtkSMPTools_h
26
27#include "vtkCommonCoreModule.h" // For export macro
28#include "vtkObject.h"
29
31#include "vtkSMPThreadLocal.h" // For Initialized
32
33#include <cmath> // For std::ceil
34#include <functional> // For std::function
35#include <type_traits> // For std:::enable_if
36
37#ifndef DOXYGEN_SHOULD_SKIP_THIS
38namespace vtk
39{
40namespace detail
41{
42namespace smp
43{
44VTK_ABI_NAMESPACE_BEGIN
45template <typename T>
46class vtkSMPTools_Has_Initialize
47{
48 typedef char (&no_type)[1];
49 typedef char (&yes_type)[2];
50 template <typename U, void (U::*)()>
51 struct V
52 {
53 };
54 template <typename U>
55 static yes_type check(V<U, &U::Initialize>*);
56 template <typename U>
57 static no_type check(...);
58
59public:
60 static bool const value = sizeof(check<T>(nullptr)) == sizeof(yes_type);
61};
62
63template <typename T>
64class vtkSMPTools_Has_Initialize_const
65{
66 typedef char (&no_type)[1];
67 typedef char (&yes_type)[2];
68 template <typename U, void (U::*)() const>
69 struct V
70 {
71 };
72 template <typename U>
73 static yes_type check(V<U, &U::Initialize>*);
74 template <typename U>
75 static no_type check(...);
76
77public:
78 static bool const value = sizeof(check<T>(0)) == sizeof(yes_type);
79};
80
81template <typename Functor, bool Init>
82struct vtkSMPTools_FunctorInternal;
83
84template <typename Functor>
85struct vtkSMPTools_FunctorInternal<Functor, false>
86{
87 Functor& F;
88 vtkSMPTools_FunctorInternal(Functor& f)
89 : F(f)
90 {
91 }
92 void Execute(vtkIdType first, vtkIdType last) { this->F(first, last); }
93 void For(vtkIdType first, vtkIdType last, vtkIdType grain)
94 {
95 auto& SMPToolsAPI = vtkSMPToolsAPI::GetInstance();
96 SMPToolsAPI.For(first, last, grain, *this);
97 }
98 vtkSMPTools_FunctorInternal<Functor, false>& operator=(
99 const vtkSMPTools_FunctorInternal<Functor, false>&);
100 vtkSMPTools_FunctorInternal(const vtkSMPTools_FunctorInternal<Functor, false>&);
101};
102
103template <typename Functor>
104struct vtkSMPTools_FunctorInternal<Functor, true>
105{
106 Functor& F;
107 vtkSMPThreadLocal<unsigned char> Initialized;
108 vtkSMPTools_FunctorInternal(Functor& f)
109 : F(f)
110 , Initialized(0)
111 {
112 }
113 void Execute(vtkIdType first, vtkIdType last)
114 {
115 unsigned char& inited = this->Initialized.Local();
116 if (!inited)
117 {
118 this->F.Initialize();
119 inited = 1;
120 }
121 this->F(first, last);
122 }
123 void For(vtkIdType first, vtkIdType last, vtkIdType grain)
124 {
125 auto& SMPToolsAPI = vtkSMPToolsAPI::GetInstance();
126 SMPToolsAPI.For(first, last, grain, *this);
127 this->F.Reduce();
128 }
129 vtkSMPTools_FunctorInternal<Functor, true>& operator=(
130 const vtkSMPTools_FunctorInternal<Functor, true>&);
131 vtkSMPTools_FunctorInternal(const vtkSMPTools_FunctorInternal<Functor, true>&);
132};
133
134template <typename Functor>
135class vtkSMPTools_Lookup_For
136{
137 static bool const init = vtkSMPTools_Has_Initialize<Functor>::value;
138
139public:
140 typedef vtkSMPTools_FunctorInternal<Functor, init> type;
141};
142
143template <typename Functor>
144class vtkSMPTools_Lookup_For<Functor const>
145{
146 static bool const init = vtkSMPTools_Has_Initialize_const<Functor>::value;
147
148public:
149 typedef vtkSMPTools_FunctorInternal<Functor const, init> type;
150};
151
152template <typename Iterator, typename Functor, bool Init>
153struct vtkSMPTools_RangeFunctor;
154
155template <typename Iterator, typename Functor>
156struct vtkSMPTools_RangeFunctor<Iterator, Functor, false>
157{
158 Functor& F;
159 Iterator& Begin;
160 vtkSMPTools_RangeFunctor(Iterator& begin, Functor& f)
161 : F(f)
162 , Begin(begin)
163 {
164 }
165 void operator()(vtkIdType first, vtkIdType last)
166 {
167 Iterator itFirst(Begin);
168 std::advance(itFirst, first);
169 Iterator itLast(itFirst);
170 std::advance(itLast, last - first);
171 this->F(itFirst, itLast);
172 }
173};
174
175template <typename Iterator, typename Functor>
176struct vtkSMPTools_RangeFunctor<Iterator, Functor, true>
177{
178 Functor& F;
179 Iterator& Begin;
180 vtkSMPTools_RangeFunctor(Iterator& begin, Functor& f)
181 : F(f)
182 , Begin(begin)
183 {
184 }
185 void Initialize() { this->F.Initialize(); }
186 void operator()(vtkIdType first, vtkIdType last)
187 {
188 Iterator itFirst(Begin);
189 std::advance(itFirst, first);
190 Iterator itLast(itFirst);
191 std::advance(itLast, last - first);
192 this->F(itFirst, itLast);
193 }
194 void Reduce() { this->F.Reduce(); }
195};
196
197template <typename Iterator, typename Functor>
198class vtkSMPTools_Lookup_RangeFor
199{
200 static bool const init = vtkSMPTools_Has_Initialize<Functor>::value;
201
202public:
203 typedef vtkSMPTools_RangeFunctor<Iterator, Functor, init> type;
204};
205
206template <typename Iterator, typename Functor>
207class vtkSMPTools_Lookup_RangeFor<Iterator, Functor const>
208{
209 static bool const init = vtkSMPTools_Has_Initialize_const<Functor>::value;
210
211public:
212 typedef vtkSMPTools_RangeFunctor<Iterator, Functor const, init> type;
213};
214
215template <typename T>
216using resolvedNotInt = typename std::enable_if<!std::is_integral<T>::value, void>::type;
217VTK_ABI_NAMESPACE_END
218} // namespace smp
219} // namespace detail
220} // namespace vtk
221#endif // DOXYGEN_SHOULD_SKIP_THIS
222
223VTK_ABI_NAMESPACE_BEGIN
224class VTKCOMMONCORE_EXPORT vtkSMPTools
225{
226public:
228
237 template <typename Functor>
238 static void For(vtkIdType first, vtkIdType last, vtkIdType grain, Functor& f)
239 {
240 typename vtk::detail::smp::vtkSMPTools_Lookup_For<Functor>::type fi(f);
241 fi.For(first, last, grain);
242 }
243
244 template <typename Functor>
245 static void For(vtkIdType first, vtkIdType last, vtkIdType grain, Functor const& f)
246 {
247 typename vtk::detail::smp::vtkSMPTools_Lookup_For<Functor const>::type fi(f);
248 fi.For(first, last, grain);
249 }
250
251
253
262 template <typename Functor>
263 static void For(vtkIdType first, vtkIdType last, Functor& f)
264 {
265 vtkSMPTools::For(first, last, 0, f);
266 }
267
268 template <typename Functor>
269 static void For(vtkIdType first, vtkIdType last, Functor const& f)
270 {
271 vtkSMPTools::For(first, last, 0, f);
272 }
273
274
276
311 template <typename Iter, typename Functor>
312 static vtk::detail::smp::resolvedNotInt<Iter> For(
313 Iter begin, Iter end, vtkIdType grain, Functor& f)
314 {
315 vtkIdType size = std::distance(begin, end);
316 typename vtk::detail::smp::vtkSMPTools_Lookup_RangeFor<Iter, Functor>::type fi(begin, f);
317 vtkSMPTools::For(0, size, grain, fi);
318 }
319
320 template <typename Iter, typename Functor>
321 static vtk::detail::smp::resolvedNotInt<Iter> For(
322 Iter begin, Iter end, vtkIdType grain, Functor const& f)
323 {
324 vtkIdType size = std::distance(begin, end);
325 typename vtk::detail::smp::vtkSMPTools_Lookup_RangeFor<Iter, Functor const>::type fi(begin, f);
326 vtkSMPTools::For(0, size, grain, fi);
327 }
328
329
331
364 template <typename Iter, typename Functor>
365 static vtk::detail::smp::resolvedNotInt<Iter> For(Iter begin, Iter end, Functor& f)
366 {
367 vtkSMPTools::For(begin, end, 0, f);
368 }
369
370 template <typename Iter, typename Functor>
371 static vtk::detail::smp::resolvedNotInt<Iter> For(Iter begin, Iter end, Functor const& f)
372 {
373 vtkSMPTools::For(begin, end, 0, f);
374 }
375
376
380 static const char* GetBackend();
381
393 static bool SetBackend(const char* backend);
394
410 static void Initialize(int numThreads = 0);
411
419
427
439 static void SetNestedParallelism(bool isNested);
440
445 static bool GetNestedParallelism();
446
450 static bool IsParallelScope();
451
456 static bool GetSingleThread();
457
465 struct Config
466 {
469 bool NestedParallelism = false;
470
471 Config() = default;
472 Config(int maxNumberOfThreads)
473 : MaxNumberOfThreads(maxNumberOfThreads)
474 {
475 }
476 Config(std::string backend)
477 : Backend(backend)
478 {
479 }
480 Config(bool nestedParallelism)
481 : NestedParallelism(nestedParallelism)
482 {
483 }
484 Config(int maxNumberOfThreads, std::string backend, bool nestedParallelism)
485 : MaxNumberOfThreads(maxNumberOfThreads)
486 , Backend(backend)
487 , NestedParallelism(nestedParallelism)
488 {
489 }
490#ifndef DOXYGEN_SHOULD_SKIP_THIS
492 : MaxNumberOfThreads(API.GetInternalDesiredNumberOfThread())
493 , Backend(API.GetBackend())
494 , NestedParallelism(API.GetNestedParallelism())
495 {
496 }
497#endif // DOXYGEN_SHOULD_SKIP_THIS
498 };
499
522 static constexpr vtkIdType THRESHOLD = 100000;
523
535 template <typename T>
536 static void LocalScope(Config const& config, T&& lambda)
537 {
539 SMPToolsAPI.LocalScope<vtkSMPTools::Config>(config, lambda);
540 }
541
557 template <typename InputIt, typename OutputIt, typename Functor>
558 static void Transform(InputIt inBegin, InputIt inEnd, OutputIt outBegin, Functor transform)
559 {
561 SMPToolsAPI.Transform(inBegin, inEnd, outBegin, transform);
562 }
563
580 template <typename InputIt1, typename InputIt2, typename OutputIt, typename Functor>
581 static void Transform(
582 InputIt1 inBegin1, InputIt1 inEnd, InputIt2 inBegin2, OutputIt outBegin, Functor transform)
583 {
585 SMPToolsAPI.Transform(inBegin1, inEnd, inBegin2, outBegin, transform);
586 }
587
602 template <typename Iterator, typename T>
603 static void Fill(Iterator begin, Iterator end, const T& value)
604 {
606 SMPToolsAPI.Fill(begin, end, value);
607 }
608
614 template <typename RandomAccessIterator>
615 static void Sort(RandomAccessIterator begin, RandomAccessIterator end)
616 {
618 SMPToolsAPI.Sort(begin, end);
619 }
620
627 template <typename RandomAccessIterator, typename Compare>
628 static void Sort(RandomAccessIterator begin, RandomAccessIterator end, Compare comp)
629 {
631 SMPToolsAPI.Sort(begin, end, comp);
632 }
633
646 template <typename RandomAccessIterator, typename ValueType>
647 static ValueType ExclusiveScan(
648 RandomAccessIterator begin, RandomAccessIterator end, ValueType init);
649}; // vtkSMPTools
650
651//------------------------------------------------------------------------------
652// This templated method is defined here in vtkSMPTools.h, rather than in the
653// backend implementation classes, because it leverages high-level
654// vtkSMPTools methods (e.g., vtkSMPTools::For()) -- i.e., scan is not yet
655// implemented in the backend classes. This may change as methods such as
656// std::exclusive_scan become stable, and universally support parallel
657// implementations. Further, this implementation has the benefit that it
658// performs the scan operation in-place, not all backends (at this time)
659// efficiently support in-place scans. (TODO: currently GCC does not perform
660// in-place scan correctly [this is a known bug, fixed in GCC versions 14.3
661// and 13.4 see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108236].
662// Eventually, the sequential loops that follow below should be replaced with
663// std::exclusive_scan().)
664template <typename RandomAccessIterator, typename ValueType>
666 RandomAccessIterator begin, RandomAccessIterator end, ValueType init)
667{
668 // Compute the number of elements in the container.
669 typename std::iterator_traits<RandomAccessIterator>::difference_type num =
670 std::distance(begin, end);
671
672 // Capture the last value before it is overwritten. It will be used to compute
673 // the return value.
674 ValueType retVal = *(end - 1);
675
676 // It's best to perform a sequential scan for "smallish" data.
677 if (vtkSMPTools::THRESHOLD > num)
678 {
679 ValueType val, sum = init;
680 for (auto iter = begin; iter != end; ++iter)
681 {
682 val = *iter;
683 *iter = sum;
684 sum += val;
685 }
686 }
687
688 // Otherwise, threaded computation
689 else
690 {
691 // Perform the parallel scan using two vtkSMPTools::For() passes. The
692 // first for loop sums values across batches of elements. Then a
693 // subsequent sequential summation across the resulting batches
694 // determines the initial, offset sum for each batch. Finally the second
695 // vtkSMPTools::For loop updates the sums across all batches. These
696 // operations are performed in-place. The number of batches is set to
697 // a small multiple of the number of threads. Empirically, a multiple
698 // of 2-4 times the number of threads works well.
699 int numBatches = 4 * vtkSMPTools::GetEstimatedNumberOfThreads();
700 std::vector<ValueType> batchOffsets(numBatches);
701 int batchSize = static_cast<int>(std::ceil(static_cast<double>(num) / numBatches));
702
703 // First pass: sum across batches.
704 vtkSMPTools::For(0, numBatches,
705 [&](int batchId, vtkIdType endBatchId)
706 {
707 auto beginBatch = begin + (batchId * batchSize);
708 auto endBatch = begin + (endBatchId * batchSize);
709 endBatch = (endBatch > end ? end : endBatch);
710
711 ValueType val, sum{}; // sum initialized to 0 (built-in type); or default constructed
712 for (auto iter = beginBatch; iter < endBatch; ++iter)
713 {
714 val = *iter;
715 *iter = sum;
716 sum += val;
717 }
718 batchOffsets[batchId] = sum;
719 }); // end lambda
720
721 // Perform a sequential, in-place prefix sum across each batch to create
722 // the batch offsets. The initial value of the scan, init, is added in
723 // here.
724 ValueType val, count = init;
725 for (int batchNum = 0; batchNum < numBatches; ++batchNum)
726 {
727 val = batchOffsets[batchNum];
728 batchOffsets[batchNum] = count;
729 count += val;
730 }
731
732 // Now add batch offsets to all entries to produce the prefix sum.
733 vtkSMPTools::For(0, numBatches,
734 [&](int batchId, vtkIdType endBatchId)
735 {
736 auto beginBatch = begin + (batchId * batchSize);
737 auto endBatch = begin + (endBatchId * batchSize);
738 endBatch = (endBatch > end ? end : endBatch);
739
740 ValueType offset = batchOffsets[batchId];
741 for (auto iter = beginBatch; iter < endBatch; ++iter)
742 {
743 *iter += offset;
744 }
745 }); // end lambda
746 } // threaded scan
747
748 return (retVal += *(end - 1));
749} // ExclusiveScan()
750
751VTK_ABI_NAMESPACE_END
752#endif
753// VTK-HeaderTest-Exclude: vtkSMPTools.h
T & Local()
This needs to be called mainly within a threaded execution path.
A set of parallel (multi-threaded) utility functions.
static void Transform(InputIt1 inBegin1, InputIt1 inEnd, InputIt2 inBegin2, OutputIt outBegin, Functor transform)
A convenience method for transforming data.
static bool GetSingleThread()
Returns true if the given thread is specified thread for single scope.
static vtk::detail::smp::resolvedNotInt< Iter > For(Iter begin, Iter end, Functor const &f)
Execute a for operation in parallel.
static void Initialize(int numThreads=0)
/!
static bool SetBackend(const char *backend)
/!
static bool IsParallelScope()
Return true if it is called from a parallel scope.
static void Transform(InputIt inBegin, InputIt inEnd, OutputIt outBegin, Functor transform)
A convenience method for transforming data.
static int GetEstimatedNumberOfThreads()
Get the estimated number of threads being used by the backend.
static vtk::detail::smp::resolvedNotInt< Iter > For(Iter begin, Iter end, Functor &f)
Execute a for operation in parallel.
static void SetNestedParallelism(bool isNested)
/!
static vtk::detail::smp::resolvedNotInt< Iter > For(Iter begin, Iter end, vtkIdType grain, Functor &f)
Execute a for operation in parallel.
static void Sort(RandomAccessIterator begin, RandomAccessIterator end)
A convenience method for sorting data.
static void Fill(Iterator begin, Iterator end, const T &value)
A convenience method for filling data.
static void For(vtkIdType first, vtkIdType last, Functor const &f)
Execute a for operation in parallel.
static void For(vtkIdType first, vtkIdType last, Functor &f)
Execute a for operation in parallel.
static vtk::detail::smp::resolvedNotInt< Iter > For(Iter begin, Iter end, vtkIdType grain, Functor const &f)
Execute a for operation in parallel.
static ValueType ExclusiveScan(RandomAccessIterator begin, RandomAccessIterator end, ValueType init)
A convenience method for computing the exclusive scan / prefix sum.
static void For(vtkIdType first, vtkIdType last, vtkIdType grain, Functor const &f)
Execute a for operation in parallel.
static void Sort(RandomAccessIterator begin, RandomAccessIterator end, Compare comp)
A convenience method for sorting data.
static bool GetNestedParallelism()
Get true if the nested parallelism is enabled.
static void LocalScope(Config const &config, T &&lambda)
/!
static const char * GetBackend()
Get the backend in use.
static constexpr vtkIdType THRESHOLD
This is an empirically determined threshold used by various cases to switch between serial and thread...
static void For(vtkIdType first, vtkIdType last, vtkIdType grain, Functor &f)
Execute a for operation in parallel.
static int GetEstimatedDefaultNumberOfThreads()
Get the estimated number of threads being used by the backend by default.
static vtkSMPToolsAPI & GetInstance()
@ value
Definition vtkX3D.h:221
@ type
Definition vtkX3D.h:517
Specialization of tuple ranges and iterators for vtkAOSDataArrayTemplate.
Structure used to specify configuration for LocalScope() method.
Config(std::string backend)
Config(int maxNumberOfThreads)
std::string Backend
Config(bool nestedParallelism)
Config(int maxNumberOfThreads, std::string backend, bool nestedParallelism)
int vtkIdType
Definition vtkType.h:363