class
syclFlowclass for building a SYCL task dependency graph
Contents
Constructors, destructors, conversion operators
Public functions
- auto empty() const -> bool
- queries the emptiness of the graph
- auto num_tasks() const -> size_t
- queries the number of tasks
-
void dump(std::
ostream& os) const - dumps the syclFlow graph into a DOT format through an output stream
- void clear()
- clear the associated graph
-
template<typename F, std::enable_if_t<std::is_invocable_r_v<void, F, sycl::handler&>, void>* = nullptr>auto on(F&& func) -> syclTask
- creates a task that launches the given command group function object
-
template<typename F, std::enable_if_t<std::is_invocable_r_v<void, F, sycl::handler&>, void>* = nullptr>void on(syclTask task, F&& func)
- updates the task to the given command group function object
- auto memcpy(void* tgt, const void* src, size_t bytes) -> syclTask
- creates a memcpy task that copies untyped data in bytes
- auto memset(void* ptr, int value, size_t bytes) -> syclTask
- creates a memset task that fills untyped data with a byte value
-
template<typename T>auto fill(void* ptr, const T& pattern, size_t count) -> syclTask
- creates a fill task that fills typed data with the given value
-
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>auto copy(T* target, const T* source, size_t count) -> syclTask
- creates a copy task that copies typed data from a source to a target memory block
-
template<typename... ArgsT>auto parallel_for(ArgsT && ... args) -> syclTask
- creates a kernel task
-
template<typename F>auto single_task(F&& func) -> syclTask
- invokes a SYCL kernel function using only one thread
-
template<typename I, typename C>auto for_each(I first, I last, C&& callable) -> syclTask
- applies a callable to each dereferenced element of the data array
-
template<typename I, typename C>auto for_each_index(I first, I last, I step, C&& callable) -> syclTask
- applies a callable to each index in the range with the step size
-
template<typename I, typename C, typename... S>auto transform(I first, I last, C&& callable, S... srcs) -> syclTask
- applies a callable to a source range and stores the result in a target range
-
template<typename I, typename T, typename C>auto reduce(I first, I last, T* result, C&& op) -> syclTask
- performs parallel reduction over a range of items
-
template<typename I, typename T, typename C>auto uninitialized_reduce(I first, I last, T* result, C&& op) -> syclTask
- similar to tf::
syclFlow:: reduce but does not assume any initial value to reduce -
template<typename P>void offload_until(P&& predicate)
- offloads the syclFlow onto a GPU and repeatedly runs it until the predicate becomes true
- void offload_n(size_t N)
- offloads the syclFlow and executes it by the given times
- void offload()
- offloads the syclFlow and executes it once
- void memcpy(syclTask task, void* tgt, const void* src, size_t bytes)
- rebinds the task to a memcpy task
- void memset(syclTask task, void* ptr, int value, size_t bytes)
- rebinds the task to a memset task
-
template<typename T>void fill(syclTask task, void* ptr, const T& pattern, size_t count)
- rebinds the task to a fill task
-
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>void copy(syclTask task, T* target, const T* source, size_t count)
- rebinds the task to a copy task
-
template<typename... ArgsT>void parallel_for(syclTask task, ArgsT && ... args)
- rebinds the task to a parallel-for kernel task
-
template<typename F>void single_task(syclTask task, F&& func)
- rebinds the task to a single-threaded kernel task
Function documentation
tf:: syclFlow:: syclFlow(sycl::queue& queue)
constructs a standalone syclFlow from the given queue
A standalone syclFlow does not go through any taskflow and can be run by the caller thread using explicit offload methods (e.g., tf::
template<typename F, std::enable_if_t<std::is_invocable_r_v<void, F, sycl::handler&>, void>* = nullptr>
syclTask tf:: syclFlow:: on(F&& func)
creates a task that launches the given command group function object
Template parameters | |
---|---|
F | type of command group function object |
Parameters | |
func | function object that is constructible from std::function<void(sycl::handler&)> |
Creates a task that is associated from the given command group. In SYCL, each command group function object is given a unique command group handler object to perform all the necessary work required to correctly process data on a device using a kernel.
template<typename F, std::enable_if_t<std::is_invocable_r_v<void, F, sycl::handler&>, void>* = nullptr>
void tf:: syclFlow:: on(syclTask task,
F&& func)
updates the task to the given command group function object
Similar to tf::
syclTask tf:: syclFlow:: memcpy(void* tgt,
const void* src,
size_t bytes)
creates a memcpy task that copies untyped data in bytes
Parameters | |
---|---|
tgt | pointer to the target memory block |
src | pointer to the source memory block |
bytes | bytes to copy |
Returns | a tf:: |
A memcpy task transfers bytes
of data from a source locationA src
to a target location tgt
. Both src
and tgt
may be either host or USM pointers.
syclTask tf:: syclFlow:: memset(void* ptr,
int value,
size_t bytes)
creates a memset task that fills untyped data with a byte value
Parameters | |
---|---|
ptr | pointer to the destination device memory area |
value | value to set for each byte of specified memory |
bytes | number of bytes to set |
Returns | a tf:: |
Fills bytes
of memory beginning at address ptr
with value
. ptr
must be a USM allocation. value
is interpreted as an unsigned char.
template<typename T>
syclTask tf:: syclFlow:: fill(void* ptr,
const T& pattern,
size_t count)
creates a fill task that fills typed data with the given value
Template parameters | |
---|---|
T | trivially copyable value type |
Parameters | |
ptr | pointer to the memory to fill |
pattern | pattern value to fill into the memory |
count | number of items to fill the value |
Creates a task that fills the specified memory with the specified value.
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>
syclTask tf:: syclFlow:: copy(T* target,
const T* source,
size_t count)
creates a copy task that copies typed data from a source to a target memory block
Template parameters | |
---|---|
T | trivially copyable value type |
Parameters | |
target | pointer to the memory to fill |
source | pointer to the pattern value to fill into the memory |
count | number of items to fill the value |
Creates a task that copies count
items of type T
from a source memory location to a target memory location.
template<typename... ArgsT>
syclTask tf:: syclFlow:: parallel_for(ArgsT && ... args)
creates a kernel task
Template parameters | |
---|---|
ArgsT | arguments types |
Parameters | |
args | arguments to forward to the parallel_for methods defined in the handler object |
Creates a kernel task from a parallel_for method through the handler object associated with a command group.
template<typename F>
syclTask tf:: syclFlow:: single_task(F&& func)
invokes a SYCL kernel function using only one thread
Template parameters | |
---|---|
F | kernel function type |
Parameters | |
func | kernel function |
Creates a task that launches the given function object using only one kernel thread.
template<typename I, typename C>
syclTask tf:: syclFlow:: for_each(I first,
I last,
C&& callable)
applies a callable to each dereferenced element of the data array
Template parameters | |
---|---|
I | iterator type |
C | callable type |
Parameters | |
first | iterator to the beginning (inclusive) |
last | iterator to the end (exclusive) |
callable | a callable object to apply to the dereferenced iterator |
Returns | a tf:: |
This method is equivalent to the parallel execution of the following loop on a GPU:
for(auto itr = first; itr != last; itr++) { callable(*itr); }
template<typename I, typename C>
syclTask tf:: syclFlow:: for_each_index(I first,
I last,
I step,
C&& callable)
applies a callable to each index in the range with the step size
Template parameters | |
---|---|
I | index type |
C | callable type |
Parameters | |
first | beginning index |
last | last index |
step | step size |
callable | the callable to apply to each element in the data array |
Returns | a tf:: |
This method is equivalent to the parallel execution of the following loop on a GPU:
// step is positive [first, last) for(auto i=first; i<last; i+=step) { callable(i); } // step is negative [first, last) for(auto i=first; i>last; i+=step) { callable(i); }
template<typename I, typename C, typename... S>
syclTask tf:: syclFlow:: transform(I first,
I last,
C&& callable,
S... srcs)
applies a callable to a source range and stores the result in a target range
Template parameters | |
---|---|
I | iterator type |
C | callable type |
S | source types |
Parameters | |
first | iterator to the beginning (inclusive) |
last | iterator to the end (exclusive) |
callable | the callable to apply to each element in the range |
srcs | iterators to the source ranges |
Returns | a tf:: |
This method is equivalent to the parallel execution of the following loop on a SYCL device:
while (first != last) { *first++ = callable(*src1++, *src2++, *src3++, ...); }
template<typename I, typename T, typename C>
syclTask tf:: syclFlow:: reduce(I first,
I last,
T* result,
C&& op)
performs parallel reduction over a range of items
Template parameters | |
---|---|
I | input iterator type |
T | value type |
C | callable type |
Parameters | |
first | iterator to the beginning (inclusive) |
last | iterator to the end (exclusive) |
result | pointer to the result with an initialized value |
op | binary reduction operator |
Returns | a tf:: |
This method is equivalent to the parallel execution of the following loop on a SYCL device:
while (first != last) { *result = op(*result, *first++); }
template<typename I, typename T, typename C>
syclTask tf:: syclFlow:: uninitialized_reduce(I first,
I last,
T* result,
C&& op)
similar to tf::
This method is equivalent to the parallel execution of the following loop on a SYCL device:
*result = *first++; // no initial values partitipcate in the loop while (first != last) { *result = op(*result, *first++); }
template<typename P>
void tf:: syclFlow:: offload_until(P&& predicate)
offloads the syclFlow onto a GPU and repeatedly runs it until the predicate becomes true
Template parameters | |
---|---|
P | predicate type (a binary callable) |
Parameters | |
predicate | a binary predicate (returns true for stop) |
Repetitively executes the present syclFlow through the given queue object until the predicate returns true
.
By default, if users do not offload the syclFlow, the executor will offload it once.
void tf:: syclFlow:: offload_n(size_t N)
offloads the syclFlow and executes it by the given times
Parameters | |
---|---|
N | number of executions |
void tf:: syclFlow:: memcpy(syclTask task,
void* tgt,
const void* src,
size_t bytes)
rebinds the task to a memcpy task
Similar to tf::
void tf:: syclFlow:: memset(syclTask task,
void* ptr,
int value,
size_t bytes)
rebinds the task to a memset task
Similar to tf::
template<typename T>
void tf:: syclFlow:: fill(syclTask task,
void* ptr,
const T& pattern,
size_t count)
rebinds the task to a fill task
Similar to tf::
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>
void tf:: syclFlow:: copy(syclTask task,
T* target,
const T* source,
size_t count)
rebinds the task to a copy task
Similar to tf::
template<typename... ArgsT>
void tf:: syclFlow:: parallel_for(syclTask task,
ArgsT && ... args)
rebinds the task to a parallel-for kernel task
Similar to tf::
template<typename F>
void tf:: syclFlow:: single_task(syclTask task,
F&& func)
rebinds the task to a single-threaded kernel task
Similar to tf::