VisionCpp  0.0.1
parallel_copy.hpp
Go to the documentation of this file.
1 // This file is part of VisionCpp, a lightweight C++ template library
2 // for computer vision and image processing.
3 //
4 // Copyright (C) 2016 Codeplay Software Limited. All Rights Reserved.
5 //
6 // Contact: visioncpp@codeplay.com
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 
20 /// \file parallel_copy.hpp
21 /// \brief This file contains the ParallelCopy struct which is used to allocate the
22 /// partial result of the right-hand side expression (RHS) to the (partial block
23 /// of) left-hand side expression LHS is always a leaf node.
24 
25 #ifndef VISIONCPP_INCLUDE_FRAMEWORK_EXPR_TREE_POINT_OPS_PARALLEL_COPY_HPP_
26 #define VISIONCPP_INCLUDE_FRAMEWORK_EXPR_TREE_POINT_OPS_PARALLEL_COPY_HPP_
27 
28 namespace visioncpp {
29 namespace internal {
30 /// \struct ParallelCopy
31 /// \brief parallel copy is used to allocate the partial result of the right-
32 /// hand side expression (RHS) to the (partial block of) left hand side
33 /// expression LHS is always a leaf node. It can be used for PointOP, NeighbourOP,
34 /// and GlobalNeighbourOP.
35 /// template parameters:
36 /// \tparam LHS is the output leafNode
37 /// \tparam RHS is the right-hand side expression
38 /// \tparam Cols: determines the column size of the output
39 /// \tparam Rows: determines the row size of the output
40 /// \tparam OffsetColIn: starting column offset for RHS result
41 /// \tparam OffsetRowIn: starting Row offset for RHS result
42 /// \tparam OffsetColOut: starting column offset for LHS node
43 /// \tparam OffsetRowOut: starting Row offset for LHS node
44 /// \tparam LfType: determines the type of the leafNode {Buffer2D, Buffer1D,
45 /// Host, Image}
46 /// \tparam LVL: the level of the node in the expression tree
47 
48 template <typename LHS, typename RHS, size_t Cols, size_t Rows,
49  size_t OffsetColIn, size_t OffsetRowIn, size_t OffsetColOut,
50  size_t OffsetRowOut, size_t LfType, size_t LVL>
51 struct ParallelCopy {
52  static constexpr bool has_out = true;
53  using OutType = typename LHS::OutType;
54  using Type = typename LHS::Type;
55  static constexpr size_t Level = LVL;
56  static constexpr size_t RThread =
58  RHS::RThread>::Value; // Rows;
59  static constexpr size_t CThread =
61  RHS::CThread>::Value; // Cols;
62  static constexpr size_t ND_Category = expr_category::Binary;
63  static constexpr size_t LeafType = Type::LeafType;
64  using RHSExpr = RHS;
65  using LHSExpr = LHS;
66  template <typename TmpLHS, typename TmpRHS>
67  using ExprExchange =
68  internal::ParallelCopy<TmpLHS, TmpRHS, Cols, Rows, OffsetColIn,
69  OffsetRowIn, OffsetColOut, OffsetRowOut, LfType,
70  LVL>;
71 
72  // Maybe this can be passed based on the shape of the copy. If
73  // source is equal to dest that can be passed as false,
74  // if the type is assign and neighbour op
75  // cols and rows should be the same size as cols and rows of the RHS output.
76  // and the start offset for input should be 0,0
77 
78  static constexpr size_t Operation_type =
79  RHS::Operation_type; // internal::ops_category::PointOP;
80  static constexpr bool SubExpressionEvaluationNeeded =
82  (Cols != RHS::Type::Cols || Rows != RHS::Type::Rows ||
83  OffsetColIn != 0 || OffsetRowIn != 0)) ||
84  RHS::SubExpressionEvaluationNeeded;
85  LHS lhs;
86  RHS rhs;
88  ParallelCopy(LHS lhsArg, RHS rhsArg)
89  : lhs(lhsArg), rhs(rhsArg), subexpr_execution_reseter(false) {}
90 
91  void reset(bool reset) {
92  lhs.reset(reset);
93  rhs.reset(reset);
95  }
96 
97  /// sub_expression_evaluation
98  /// \brief This function is used to break the expression tree whenever
99  /// necessary. The decision for breaking the tree will be determined based on
100  /// the static parameter called SubExpressionEvaluationNeeded. When this is
101  /// set to true, the sub_expression_evaluation is called recursively from the
102  /// root of the tree. Each node based on their parent decision will decide to
103  /// launch a kernel for itself. Also, they decide for each of their children
104  /// whether or not to launch a kernel separately.
105  /// template parameters:
106  ///\tparam ForcedToExec : a boolean value representing the decision made by
107  /// the parent of this node for launching a kernel.
108  /// \tparam LC: is the column size of local memory required by Filter2D and
109  /// DownSmplOP
110  /// \tparam LR: is the row size of local memory required by Filter2D and
111  /// DownSmplOP
112  /// \tparam LCT: is the column size of workgroup
113  /// \tparam LRT: is the row size of workgroup
114  /// function parameters:
115  /// \param dev : the selected device for executing the expression
116  /// \return LeafNode
117  template <bool ForcedToExec, size_t LC, size_t LR, size_t LCT, size_t LRT,
118  typename DeviceT>
119  LHS inline sub_expression_evaluation(const DeviceT &dev) {
120  // Here, again, we cannot use the general subexpr. For two reasons:
121  // partial assign is always the root of a tree. So it does not come here and
122  // there is no eval_expr provided for that.
123  // Secondly even if we come here the lhs is passed as a type and there is no
124  // intermediate type for that. We have to run it differently from the
125  // intermediate root node.
126  auto eval_sub =
127  rhs.template sub_expression_evaluation<false, LC, LR, LCT, LRT>(dev);
128  // through template instantiation it executes the kernel when it is a leaf node.
129  auto intermediate_output =
130  SubExprRes<LC, LR, LCT, LRT, 1 + LVL, decltype(eval_sub), DeviceT>::get(eval_sub,
131  dev);
132  internal::fuse<LC, LR, LCT, LRT>(
134  LHS, decltype(intermediate_output), Cols, Rows, OffsetColIn,
135  OffsetRowIn, OffsetColOut, OffsetRowOut, LHS::LeafType,
137  (LHS::Level > decltype(intermediate_output)::Level), LHS,
138  decltype(intermediate_output)>::Type::Level>(
139  lhs, intermediate_output),
140  dev);
141  return lhs;
142  }
143 };
144 } // internal
145 
146 /// partial_assign function
147 /// \brief This function is used to deduce the ParallelCopy struct.
148 template <size_t Cols, size_t Rows, size_t OffsetColIn, size_t OffsetRowIn,
149  size_t OffsetColOut, size_t OffsetRowOut, typename LHS, typename RHS>
150 auto partial_assign(LHS lhs, RHS rhs) -> internal::ParallelCopy<
151  LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn, OffsetColOut, OffsetRowOut,
152  LHS::LeafType, 1 + internal::tools::StaticIf<(LHS::Level > RHS::Level), LHS,
153  RHS>::Type::Level> {
154  return internal::ParallelCopy<
155  LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn, OffsetColOut,
156  OffsetRowOut, LHS::LeafType,
157  1 + internal::tools::StaticIf<(LHS::Level > RHS::Level), LHS,
158  RHS>::Type::Level>(lhs, rhs);
159 }
160 } // visioncpp
161 #endif // VISIONCPP_INCLUDE_FRAMEWORK_EXPR_TREE_POINT_OPS_PARALLEL_COPY_HPP_
EnableIf< k==0, typename ElemTypeHolder< 0, Tuple< Ts... > >::type & >::type get(Tuple< Ts... > &t)
get
Definition: tuple.hpp:99
VisionCpp namespace.
Definition: sycl/device.hpp:24
auto partial_assign(LHS lhs, RHS rhs) -> internal::ParallelCopy< LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn, OffsetColOut, OffsetRowOut, LHS::LeafType, 1+internal::tools::StaticIf<(LHS::Level > RHS::Level), LHS, RHS >::Type::Level >
partial_assign function
The definition is in ParallelCopy file.
static constexpr bool SubExpressionEvaluationNeeded
static constexpr size_t RThread
static constexpr size_t CThread
static constexpr size_t LeafType
LHS sub_expression_evaluation(const DeviceT &dev)
sub_expression_evaluation
static constexpr size_t ND_Category
ParallelCopy(LHS lhsArg, RHS rhsArg)
static constexpr size_t Level
static constexpr size_t Operation_type
The definition is in SubExprRes file.
It is used to select either of the input type based the Conds template parameters.
Definition: static_if.hpp:52