24 #ifndef VISIONCPP_INCLUDE_FRAMEWORK_EVALUATOR_EVAL_EXPRESSION_EVAL_EXPR_REDUCTION_HPP_
25 #define VISIONCPP_INCLUDE_FRAMEWORK_EVALUATOR_EVAL_EXPRESSION_EVAL_EXPR_REDUCTION_HPP_
31 template <
typename C_OP,
typename RHS,
size_t Cols,
size_t Rows,
size_t LfType,
32 size_t LVL,
typename Loc,
typename... Params>
33 struct EvalExpr<
RDCN<C_OP, RHS, Cols, Rows, LfType, LVL>, Loc, Params...> {
37 template <
bool IsRoot,
size_t Halo_Top,
size_t Halo_Left,
size_t Halo_Butt,
38 size_t Halo_Right,
size_t Offset,
size_t Index,
size_t LC,
45 constexpr
size_t OutOffset =
OutputLocation<IsRoot, Offset + Index - 1>::ID;
46 constexpr
bool isLocal =
48 tools::tuple::get<OutOffset>(t))>::Type>::scope ==
scope::Local;
49 constexpr
size_t LC_Ratio = RHS::CThread / Cols;
50 constexpr
size_t LR_Ratio = RHS::RThread / Rows;
52 auto nested_acc =
EvalExpr<RHS, Loc, Params...>::template eval_neighbour<
53 false, Halo_Top, Halo_Left, Halo_Butt, Halo_Right,
54 Offset, Index - 1, LC, LR>(cOffset, t).get_pointer();
56 if ((cOffset.l_c < (cOffset.cLRng / LC_Ratio)) &&
57 (cOffset.l_r < (cOffset.rLRng / LR_Ratio))) {
58 static constexpr
size_t Neighbour_LC_Ratio =
59 LC_Ratio / (RHS::Type::Cols / Cols);
60 static constexpr
size_t Neighbour_LR_Ratio =
61 LR_Ratio / (RHS::Type::Rows / Rows);
63 nested_acc, LC / Neighbour_LC_Ratio, LR / Neighbour_LR_Ratio);
64 size_t g_c = ((cOffset.g_c - cOffset.l_c) / LC_Ratio) + cOffset.l_c;
65 size_t g_r = ((cOffset.g_r - cOffset.l_r) / LR_Ratio) + cOffset.l_r;
67 for (
int i = 0; i < LC / LC_Ratio; i += (cOffset.cLRng / LC_Ratio)) {
68 if (get_compare<isLocal, LC / LC_Ratio, Cols>(cOffset.l_c, i, g_c)) {
69 for (
size_t j = 0; j < LR / LR_Ratio;
70 j += (cOffset.rLRng / LR_Ratio)) {
71 if (get_compare<isLocal, LR / LR_Ratio, Rows>(cOffset.l_r, j,
73 neighbour.set_offset((cOffset.l_c + i), (cOffset.l_r + j));
75 id_val<isLocal>(cOffset.l_c, g_c) + i,
76 id_val<isLocal>(cOffset.l_r, g_r) + j,
77 id_val<isLocal>(LC / LC_Ratio, Cols),
78 id_val<isLocal>(LR / LR_Ratio, Rows))] =
80 LfType, decltype(tools::tuple::get<OutOffset>(t))>::Type>(
81 typename C_OP::OP()(neighbour));
90 return tools::tuple::get<OutOffset>(t);
93 template <
bool IsRoot,
size_t Offset,
size_t Index,
size_t LC,
size_t LR>
99 constexpr
size_t OutOffset =
OutputLocation<IsRoot, Offset + Index - 1>::ID;
100 constexpr
bool isLocal =
102 tools::tuple::get<OutOffset>(t))>::Type>::scope ==
scope::Local;
105 EvalExpr<RHS, Loc, Params...>::template eval_global_neighbour<
106 false, Offset, Index - 1, LC, LR>(cOffset, t).get_pointer();
109 nested_acc, RHS::Type::Cols, RHS::Type::Rows);
110 for (
int i = 0; i < LC; i += cOffset.cLRng) {
111 if (get_compare<isLocal, LC, Cols>(cOffset.l_c, i, cOffset.g_c)) {
112 for (
int j = 0; j < LR; j += cOffset.rLRng) {
113 if (get_compare<isLocal, LR, Rows>(cOffset.l_r, j, cOffset.g_r)) {
114 reduction.set_offset(cOffset.g_c + i, cOffset.g_r + j);
116 id_val<isLocal>(cOffset.l_c, cOffset.g_c) + i,
117 id_val<isLocal>(cOffset.l_r, cOffset.g_r) + j,
118 id_val<isLocal>(LC, Cols), id_val<isLocal>(LR, Rows))] =
120 LfType, decltype(tools::tuple::get<OutOffset>(t))>::Type>(
121 typename C_OP::OP()(reduction))));
130 return tools::tuple::get<OutOffset>(t);
static size_t calculate_index(size_t c, size_t r, size_t cols, size_t rows)
function calculate_index
static constexpr ScopeType Local
static auto eval_neighbour(Loc &cOffset, const tools::tuple::Tuple< Params... > &t) -> decltype(tools::tuple::get< OutputLocation< IsRoot, Offset+Index - 1 >::ID >(t))
evaluate function when the internal::ops_category is NeighbourOP.
static auto eval_global_neighbour(Loc &cOffset, const tools::tuple::Tuple< Params... > &t) -> decltype(tools::tuple::get< OutputLocation< IsRoot, Offset+Index - 1 >::ID >(t))
eval_global_neighbour function:
GlobalNeighbour is used to provide local access for each element of the global memory based on the Co...
LocalNeighbour is used to provide local access for each element of the local memory based on the Coor...
This class is used to determine the ElementType of accessor template parameters.
This is used to find whether a node should use a global memory output or a local memory output is cre...
The definition is in RDCN file.
This struct is used to trait the value type inside the accessor.