VisionCpp  0.0.1
eval_assign_partial.hpp
Go to the documentation of this file.
1 // This file is part of VisionCpp, a lightweight C++ template library
2 // for computer vision and image processing.
3 //
4 // Copyright (C) 2016 Codeplay Software Limited. All Rights Reserved.
5 //
6 // Contact: visioncpp@codeplay.com
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 
20 /// \file eval_assign_partial.hpp
21 /// This file contains the specialisation of the Evaluator struct for assign
22 /// when it is a root struct
23 
24 #ifndef VISIONCPP_INCLUDE_FRAMEWORK_EVALUATOR_EVAL_ASSIGN_EVAL_ASSIGN_PARTIAL_HPP_
25 #define VISIONCPP_INCLUDE_FRAMEWORK_EVALUATOR_EVAL_ASSIGN_EVAL_ASSIGN_PARTIAL_HPP_
26 
27 namespace visioncpp {
28 namespace internal {
29 /// \brief Partial specialisation of the Evaluator when the expression is an
30 /// ParallelCopy expression and the ops_category is PointOP.
31 template <size_t OutputIndex, size_t Offset, size_t LC, size_t LR, typename LHS,
32  typename RHS, size_t Cols, size_t Rows, size_t OffsetColIn,
33  size_t OffsetRowIn, size_t OffsetColOut, size_t OffsetRowOut,
34  size_t LfType, size_t LVL, typename Loc, typename... Params>
35 struct Evaluator<ops_category::PointOP, OutputIndex, Offset, LC, LR,
36  ParallelCopy<LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn,
37  OffsetColOut, OffsetRowOut, LfType, LVL>,
38  Loc, Params...> {
39  using Expr = ParallelCopy<LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn,
40  OffsetColOut, OffsetRowOut, LfType, LVL>;
41  static inline void eval(Loc &cOffset,
43  using RHS_Eval_Expr = EvalExpr<RHS, Loc, Params...>;
44  using LHS_Eval_Expr = EvalExpr<LHS, Loc, Params...>;
45 
46  using ElementType =
47  typename MemoryTrait<Expr::LeafType,
48  decltype(tools::tuple::get<0>(t))>::Type;
49 
50  for (int i = 0; i < LC; i += cOffset.cLRng) {
51  if (((cOffset.g_c + OffsetColIn + i) < RHS::Type::Cols) &&
52  ((cOffset.g_c + OffsetColOut + i) < LHS::Type::Cols)) {
53  for (int j = 0; j < LR; j += cOffset.rLRng) {
54  if (((cOffset.g_r + OffsetRowIn + j) < RHS::Type::Rows) &&
55  ((cOffset.g_r + OffsetRowOut + j) < LHS::Type::Rows)) {
56  cOffset.pointOp_gc = cOffset.g_c + i + OffsetColIn;
57  cOffset.pointOp_gr = cOffset.g_r + j + OffsetRowIn;
58 
59  LHS_Eval_Expr::get_accessor(t).get_pointer()[calculate_index(
60  cOffset.g_c + i + OffsetColOut, cOffset.g_r + j + OffsetRowOut,
61  LHS::Type::Cols, LHS::Type::Rows)] =
62  tools::convert<ElementType>(
63  RHS_Eval_Expr::eval_point(cOffset, t));
64  }
65  }
66  }
67  }
68  }
69 };
70 
71 /// \brief Partial specialisation of the Evaluator when the expression is an
72 /// ParallelCopy expression and the ops_category is NeighbourOP.
73 template <size_t OutputIndex, size_t Offset, size_t LC, size_t LR, typename LHS,
74  typename RHS, size_t Cols, size_t Rows, size_t OffsetColIn,
75  size_t OffsetRowIn, size_t OffsetColOut, size_t OffsetRowOut,
76  size_t LfType, size_t LVL, typename Loc, typename... Params>
77 struct Evaluator<ops_category::NeighbourOP, OutputIndex, Offset, LC, LR,
78  ParallelCopy<LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn,
79  OffsetColOut, OffsetRowOut, LfType, LVL>,
80  Loc, Params...> {
81  using Expr = ParallelCopy<LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn,
82  OffsetColOut, OffsetRowOut, LfType, LVL>;
83  static inline void eval(Loc &cOffset,
85  using RHS_Eval_Expr = EvalExpr<RHS, Loc, Params...>;
86  using LHS_Eval_Expr = EvalExpr<LHS, Loc, Params...>;
87 
88  constexpr size_t LC_Ratio = Expr::CThread / Cols;
89  constexpr size_t LR_Ratio = Expr::RThread / Rows;
90  constexpr size_t RHS_LC_Ratio = RHS::CThread / RHS::Type::Cols;
91  constexpr size_t RHS_LR_Ratio = RHS::RThread / RHS::Type::Rows;
92  auto rhs_acc2 = EvalExpr<RHS, Loc, Params...>::template eval_neighbour<
93  false, 0, 0, 0, 0, Offset, OutputIndex, LC, LR>(cOffset, t);
94  constexpr bool isLocal =
95  Trait<typename tools::RemoveAll<decltype(rhs_acc2)>::Type>::scope ==
97  cOffset.global_barrier();
98  auto rhs_acc1 = RHS_Eval_Expr::get_accessor(t);
99  auto lhs_acc1 = LHS_Eval_Expr::get_accessor(t);
100  auto rhs_acc = rhs_acc2.get_pointer();
101  auto lhs_acc = LHS_Eval_Expr::get_accessor(t).get_pointer();
102  static_assert(RHS_LR_Ratio == LR_Ratio && RHS_LC_Ratio == LC_Ratio,
103  "You made a programing mistake. The kernel must break when "
104  "the two are not equal");
105  if ((cOffset.l_c < (cOffset.cLRng / LC_Ratio)) &&
106  (cOffset.l_r < (cOffset.rLRng / LR_Ratio))) {
107  size_t g_c = ((cOffset.g_c - cOffset.l_c) / LC_Ratio) + cOffset.l_c;
108  size_t g_r = ((cOffset.g_r - cOffset.l_r) / LR_Ratio) + cOffset.l_r;
109 
110  for (int i = 0; i < LC / LC_Ratio; i += (cOffset.cLRng / LC_Ratio)) {
111  if (get_compare<isLocal, LC / RHS_LC_Ratio, RHS::Type::Cols>(
112  cOffset.l_c, i, g_c) &&
113  (g_c + i + OffsetColOut < LHS::Type::Cols)) {
114  for (size_t j = 0; j < LR / LR_Ratio;
115  j += (cOffset.rLRng / LR_Ratio)) {
116  if (get_compare<isLocal, LR / LR_Ratio, RHS::Type::Rows>(
117  cOffset.l_r, j, g_r) &&
118  (g_r + j + OffsetRowOut < LHS::Type::Rows)) {
119  lhs_acc[calculate_index(g_c + i + OffsetColOut,
120  g_r + j + OffsetRowOut, LHS::Type::Cols,
121  LHS::Type::Rows)] =
122  rhs_acc[calculate_index(cOffset.l_c + i, cOffset.l_r + j,
123  LC / LC_Ratio, LR / LR_Ratio)];
124  }
125  }
126  }
127  }
128  }
129  }
130 };
131 
132 /// \brief Partial specialisation of the Evaluator when the expression
133 /// is an ParallelCopy expression and the ops_category is
134 /// GlobalNeighbourOP.
135 
136 template <size_t OutputIndex, size_t Offset, size_t LC, size_t LR, typename LHS,
137  typename RHS, size_t Cols, size_t Rows, size_t OffsetColIn,
138  size_t OffsetRowIn, size_t OffsetColOut, size_t OffsetRowOut,
139  size_t LfType, size_t LVL, typename Loc, typename... Params>
140 struct Evaluator<ops_category::GlobalNeighbourOP, OutputIndex, Offset, LC, LR,
141  ParallelCopy<LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn,
142  OffsetColOut, OffsetRowOut, LfType, LVL>,
143  Loc, Params...> {
144  using Expr = ParallelCopy<LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn,
145  OffsetColOut, OffsetRowOut, LfType, LVL>;
146  static inline void eval(Loc &cOffset,
148  // the reason for calling like that is to avoid the last shared
149  // memory to be created for the RHS child of the root
150  using RHS_Eval_Expr = EvalExpr<RHS, Loc, Params...>;
151  using LHS_Eval_Expr = EvalExpr<LHS, Loc, Params...>;
152  EvalExpr<RHS, Loc, Params...>::template eval_global_neighbour<
153  true, Offset, OutputIndex, LC, LR>(cOffset, t);
154  auto rhs_acc = RHS_Eval_Expr::get_accessor(t).get_pointer();
155  auto lhs_acc = LHS_Eval_Expr::get_accessor(t).get_pointer();
156 
157  for (int i = 0; i < LC; i += cOffset.cLRng) {
158  if (((cOffset.g_c + OffsetColIn + i) < RHS::Type::Cols) &&
159  ((cOffset.g_c + OffsetColOut + i) < LHS::Type::Cols)) {
160  for (int j = 0; j < LR; j += cOffset.rLRng) {
161  if (((cOffset.g_r + OffsetRowIn + j) < RHS::Type::Rows) &&
162  ((cOffset.g_r + OffsetRowOut + j) < LHS::Type::Rows)) {
163  cOffset.pointOp_gc = cOffset.g_c + i + OffsetColIn;
164  cOffset.pointOp_gr = cOffset.g_r + j + OffsetRowIn;
165  lhs_acc[(cOffset.g_c + i + OffsetColOut) +
166  (LHS::Type::Cols * (cOffset.g_r + j + OffsetRowOut))] =
167  rhs_acc[(cOffset.g_c + i + OffsetColIn) +
168  (RHS::Type::Cols * (cOffset.g_r + j + OffsetRowIn))];
169  }
170  }
171  }
172  }
173  }
174 };
175 } // internal
176 } // visioncpp
177 #endif // VISIONCPP_INCLUDE_FRAMEWORK_EVALUATOR_EVAL_ASSIGN_EVAL_ASSIGN_PARTIAL_HPP_
static size_t calculate_index(size_t c, size_t r, size_t cols, size_t rows)
function calculate_index
static constexpr ScopeType Local
VisionCpp namespace.
Definition: sycl/device.hpp:24
eval_global_neighbour function:
Definition: evaluator.hpp:195
the root of the expression tree.
Definition: evaluator.hpp:195
This class is used to determine the ElementType of accessor template parameters.
The definition is in ParallelCopy file.
This struct is used to trait the value type inside the accessor.
Definition: evaluator.hpp:89
These methods are used to remove all the & const and * from a type.
The tuple is a fixed-size collection of heterogeneous values.
Definition: tuple.hpp:48