24 #ifndef VISIONCPP_INCLUDE_FRAMEWORK_EVALUATOR_EVAL_ASSIGN_EVAL_ASSIGN_PARTIAL_HPP_
25 #define VISIONCPP_INCLUDE_FRAMEWORK_EVALUATOR_EVAL_ASSIGN_EVAL_ASSIGN_PARTIAL_HPP_
31 template <
size_t OutputIndex,
size_t Offset,
size_t LC,
size_t LR,
typename LHS,
32 typename RHS,
size_t Cols,
size_t Rows,
size_t OffsetColIn,
33 size_t OffsetRowIn,
size_t OffsetColOut,
size_t OffsetRowOut,
34 size_t LfType,
size_t LVL,
typename Loc,
typename... Params>
36 ParallelCopy<LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn,
37 OffsetColOut, OffsetRowOut, LfType, LVL>,
40 OffsetColOut, OffsetRowOut, LfType, LVL>;
41 static inline void eval(Loc &cOffset,
43 using RHS_Eval_Expr =
EvalExpr<RHS, Loc, Params...>;
44 using LHS_Eval_Expr =
EvalExpr<LHS, Loc, Params...>;
48 decltype(tools::tuple::get<0>(t))>::Type;
50 for (
int i = 0; i < LC; i += cOffset.cLRng) {
51 if (((cOffset.g_c + OffsetColIn + i) < RHS::Type::Cols) &&
52 ((cOffset.g_c + OffsetColOut + i) < LHS::Type::Cols)) {
53 for (
int j = 0; j < LR; j += cOffset.rLRng) {
54 if (((cOffset.g_r + OffsetRowIn + j) < RHS::Type::Rows) &&
55 ((cOffset.g_r + OffsetRowOut + j) < LHS::Type::Rows)) {
56 cOffset.pointOp_gc = cOffset.g_c + i + OffsetColIn;
57 cOffset.pointOp_gr = cOffset.g_r + j + OffsetRowIn;
60 cOffset.g_c + i + OffsetColOut, cOffset.g_r + j + OffsetRowOut,
61 LHS::Type::Cols, LHS::Type::Rows)] =
62 tools::convert<ElementType>(
63 RHS_Eval_Expr::eval_point(cOffset, t));
73 template <
size_t OutputIndex,
size_t Offset,
size_t LC,
size_t LR,
typename LHS,
74 typename RHS,
size_t Cols,
size_t Rows,
size_t OffsetColIn,
75 size_t OffsetRowIn,
size_t OffsetColOut,
size_t OffsetRowOut,
76 size_t LfType,
size_t LVL,
typename Loc,
typename... Params>
78 ParallelCopy<LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn,
79 OffsetColOut, OffsetRowOut, LfType, LVL>,
82 OffsetColOut, OffsetRowOut, LfType, LVL>;
83 static inline void eval(Loc &cOffset,
85 using RHS_Eval_Expr =
EvalExpr<RHS, Loc, Params...>;
86 using LHS_Eval_Expr =
EvalExpr<LHS, Loc, Params...>;
88 constexpr
size_t LC_Ratio = Expr::CThread / Cols;
89 constexpr
size_t LR_Ratio = Expr::RThread / Rows;
90 constexpr
size_t RHS_LC_Ratio = RHS::CThread / RHS::Type::Cols;
91 constexpr
size_t RHS_LR_Ratio = RHS::RThread / RHS::Type::Rows;
92 auto rhs_acc2 =
EvalExpr<RHS, Loc, Params...>::template eval_neighbour<
93 false, 0, 0, 0, 0, Offset, OutputIndex, LC, LR>(cOffset, t);
94 constexpr
bool isLocal =
97 cOffset.global_barrier();
98 auto rhs_acc1 = RHS_Eval_Expr::get_accessor(t);
99 auto lhs_acc1 = LHS_Eval_Expr::get_accessor(t);
100 auto rhs_acc = rhs_acc2.get_pointer();
101 auto lhs_acc = LHS_Eval_Expr::get_accessor(t).get_pointer();
102 static_assert(RHS_LR_Ratio == LR_Ratio && RHS_LC_Ratio == LC_Ratio,
103 "You made a programing mistake. The kernel must break when "
104 "the two are not equal");
105 if ((cOffset.l_c < (cOffset.cLRng / LC_Ratio)) &&
106 (cOffset.l_r < (cOffset.rLRng / LR_Ratio))) {
107 size_t g_c = ((cOffset.g_c - cOffset.l_c) / LC_Ratio) + cOffset.l_c;
108 size_t g_r = ((cOffset.g_r - cOffset.l_r) / LR_Ratio) + cOffset.l_r;
110 for (
int i = 0; i < LC / LC_Ratio; i += (cOffset.cLRng / LC_Ratio)) {
111 if (get_compare<isLocal, LC / RHS_LC_Ratio, RHS::Type::Cols>(
112 cOffset.l_c, i, g_c) &&
113 (g_c + i + OffsetColOut < LHS::Type::Cols)) {
114 for (
size_t j = 0; j < LR / LR_Ratio;
115 j += (cOffset.rLRng / LR_Ratio)) {
116 if (get_compare<isLocal, LR / LR_Ratio, RHS::Type::Rows>(
117 cOffset.l_r, j, g_r) &&
118 (g_r + j + OffsetRowOut < LHS::Type::Rows)) {
120 g_r + j + OffsetRowOut, LHS::Type::Cols,
123 LC / LC_Ratio, LR / LR_Ratio)];
136 template <
size_t OutputIndex,
size_t Offset,
size_t LC,
size_t LR,
typename LHS,
137 typename RHS,
size_t Cols,
size_t Rows,
size_t OffsetColIn,
138 size_t OffsetRowIn,
size_t OffsetColOut,
size_t OffsetRowOut,
139 size_t LfType,
size_t LVL,
typename Loc,
typename... Params>
141 ParallelCopy<LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn,
142 OffsetColOut, OffsetRowOut, LfType, LVL>,
145 OffsetColOut, OffsetRowOut, LfType, LVL>;
146 static inline void eval(Loc &cOffset,
150 using RHS_Eval_Expr =
EvalExpr<RHS, Loc, Params...>;
151 using LHS_Eval_Expr =
EvalExpr<LHS, Loc, Params...>;
152 EvalExpr<RHS, Loc, Params...>::template eval_global_neighbour<
153 true, Offset, OutputIndex, LC, LR>(cOffset, t);
154 auto rhs_acc = RHS_Eval_Expr::get_accessor(t).get_pointer();
155 auto lhs_acc = LHS_Eval_Expr::get_accessor(t).get_pointer();
157 for (
int i = 0; i < LC; i += cOffset.cLRng) {
158 if (((cOffset.g_c + OffsetColIn + i) < RHS::Type::Cols) &&
159 ((cOffset.g_c + OffsetColOut + i) < LHS::Type::Cols)) {
160 for (
int j = 0; j < LR; j += cOffset.rLRng) {
161 if (((cOffset.g_r + OffsetRowIn + j) < RHS::Type::Rows) &&
162 ((cOffset.g_r + OffsetRowOut + j) < LHS::Type::Rows)) {
163 cOffset.pointOp_gc = cOffset.g_c + i + OffsetColIn;
164 cOffset.pointOp_gr = cOffset.g_r + j + OffsetRowIn;
165 lhs_acc[(cOffset.g_c + i + OffsetColOut) +
166 (LHS::Type::Cols * (cOffset.g_r + j + OffsetRowOut))] =
167 rhs_acc[(cOffset.g_c + i + OffsetColIn) +
168 (RHS::Type::Cols * (cOffset.g_r + j + OffsetRowIn))];
constexpr static size_t NeighbourOP
constexpr static size_t GlobalNeighbourOP
constexpr static size_t PointOP
static size_t calculate_index(size_t c, size_t r, size_t cols, size_t rows)
function calculate_index
static constexpr ScopeType Local
eval_global_neighbour function:
visioncpp::internal::Evaluator< ops_category::GlobalNeighbourOP, OutputIndex, Offset, LC, LR, ParallelCopy< LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn, OffsetColOut, OffsetRowOut, LfType, LVL >, Loc, Params... >::eval static void eval(Loc &cOffset, const tools::tuple::Tuple< Params... > &t)
visioncpp::internal::Evaluator< ops_category::NeighbourOP, OutputIndex, Offset, LC, LR, ParallelCopy< LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn, OffsetColOut, OffsetRowOut, LfType, LVL >, Loc, Params... >::eval static void eval(Loc &cOffset, const tools::tuple::Tuple< Params... > &t)
visioncpp::internal::Evaluator< ops_category::PointOP, OutputIndex, Offset, LC, LR, ParallelCopy< LHS, RHS, Cols, Rows, OffsetColIn, OffsetRowIn, OffsetColOut, OffsetRowOut, LfType, LVL >, Loc, Params... >::eval static void eval(Loc &cOffset, const tools::tuple::Tuple< Params... > &t)
the root of the expression tree.
This class is used to determine the ElementType of accessor template parameters.
The definition is in ParallelCopy file.
This struct is used to trait the value type inside the accessor.