Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_PCI.H
Go to the documentation of this file.
1#ifndef AMREX_PCI_H_
2#define AMREX_PCI_H_
3
4namespace amrex {
5
6template <class FAB>
7void
8FabArray<FAB>::PC_local_cpu (const CPC& thecpc, FabArray<FAB> const& src,
9 int scomp, int dcomp, int ncomp, CpOp op)
10{
11 auto const N_locs = static_cast<int>(thecpc.m_LocTags->size());
12 if (N_locs == 0) { return; }
13 bool is_thread_safe = thecpc.m_threadsafe_loc;
14
15 if (is_thread_safe)
16 {
17#ifdef AMREX_USE_OMP
18#pragma omp parallel for
19#endif
20 for (int i = 0; i < N_locs; ++i)
21 {
22 const CopyComTag& tag = (*thecpc.m_LocTags)[i];
23 if (this != &src || tag.dstIndex != tag.srcIndex || tag.sbox != tag.dbox) {
24 // avoid self copy or plus
25 const FAB* sfab = &(src[tag.srcIndex]);
26 FAB* dfab = &(get(tag.dstIndex));
27 if (op == FabArrayBase::COPY)
28 {
29 dfab->template copy<RunOn::Host>(*sfab, tag.sbox, scomp, tag.dbox, dcomp, ncomp);
30 }
31 else
32 {
33 dfab->template plus<RunOn::Host>(*sfab, tag.sbox, tag.dbox, scomp, dcomp, ncomp);
34 }
35 }
36 }
37 }
38 else
39 {
41 for (int i = 0; i < N_locs; ++i)
42 {
43 const CopyComTag& tag = (*thecpc.m_LocTags)[i];
44 if (this != &src || tag.dstIndex != tag.srcIndex || tag.sbox != tag.dbox) {
45 loc_copy_tags[tag.dstIndex].push_back
46 ({src.fabPtr(tag.srcIndex), tag.dbox, tag.sbox.smallEnd()-tag.dbox.smallEnd()});
47 }
48 }
49
50#ifdef AMREX_USE_OMP
51#pragma omp parallel
52#endif
53 for (MFIter mfi(*this); mfi.isValid(); ++mfi)
54 {
55 const auto& tags = loc_copy_tags[mfi];
56 auto dfab = this->array(mfi);
57 if (op == FabArrayBase::COPY)
58 {
59 for (auto const & tag : tags)
60 {
61 auto const sfab = tag.sfab->array();
62 Dim3 offset = tag.offset.dim3();
63 amrex::LoopConcurrentOnCpu (tag.dbox, ncomp,
64 [=] (int i, int j, int k, int n) noexcept
65 {
66 dfab(i,j,k,dcomp+n) = sfab(i+offset.x,j+offset.y,k+offset.z,scomp+n);
67 });
68 }
69 }
70 else
71 {
72 for (auto const & tag : tags)
73 {
74 auto const sfab = tag.sfab->array();
75 Dim3 offset = tag.offset.dim3();
76 amrex::LoopConcurrentOnCpu (tag.dbox, ncomp,
77 [=] (int i, int j, int k, int n) noexcept
78 {
79 dfab(i,j,k,dcomp+n) += sfab(i+offset.x,j+offset.y,k+offset.z,scomp+n);
80 });
81 }
82 }
83 }
84 }
85}
86
87#ifdef AMREX_USE_GPU
88template <class FAB>
89void
91 int scomp, int dcomp, int ncomp, CpOp op,
92 bool deterministic)
93{
94 int N_locs = thecpc.m_LocTags->size();
95 if (N_locs == 0) { return; }
96 bool is_thread_safe = thecpc.m_threadsafe_loc;
97
98 using TagType = Array4CopyTag<value_type>;
99 Vector<TagType> loc_copy_tags;
100 loc_copy_tags.reserve(N_locs);
101
102 Vector<BaseFab<int> > maskfabs;
103 Vector<Array4Tag<int> > masks_unique;
104 Vector<Array4Tag<int> > masks;
105 if (!is_thread_safe)
106 {
108 (op == FabArrayBase::ADD && !amrex::HasAtomicAdd <value_type>::value &&
109 !deterministic))
110 {
111 maskfabs.resize(this->local_size());
112 masks_unique.reserve(this->local_size());
113 masks.reserve(N_locs);
114 }
115 }
116
117 for (int i = 0; i < N_locs; ++i)
118 {
119 const CopyComTag& tag = (*thecpc.m_LocTags)[i];
120 if (this != &src || tag.dstIndex != tag.srcIndex || tag.sbox != tag.dbox) {
121 int li = this->localindex(tag.dstIndex);
122 loc_copy_tags.push_back
123 ({this->atLocalIdx(li).array(), tag.dstIndex,
124 src.fabPtr(tag.srcIndex)->const_array(),
125 tag.dbox,
126 (tag.sbox.smallEnd()-tag.dbox.smallEnd()).dim3()});
127
128 if (maskfabs.size() > 0) {
129 if (!maskfabs[li].isAllocated()) {
130 maskfabs[li].resize(this->atLocalIdx(li).box());
131 masks_unique.emplace_back(Array4Tag<int>{.dfab = maskfabs[li].array()});
132 }
133 masks.emplace_back(Array4Tag<int>{.dfab = maskfabs[li].array()});
134 }
135 }
136 }
137
138 if (maskfabs.size() > 0) {
139 amrex::ParallelFor(masks_unique,
140 [=] AMREX_GPU_DEVICE (int i, int j, int k, Array4Tag<int> const& msk) noexcept
141 {
142 msk.dfab(i,j,k) = 0;
143 });
144 }
145
146 if (op == FabArrayBase::COPY)
147 {
148 if (is_thread_safe) {
149 detail::fab_to_fab<value_type, value_type>(loc_copy_tags, scomp,
150 dcomp, ncomp, detail::CellStore<value_type, value_type>());
151 } else {
152 detail::fab_to_fab_atomic_cpy<value_type, value_type>(
153 loc_copy_tags, scomp, dcomp, ncomp, masks);
154 }
155 }
156 else
157 {
158 if (deterministic) {
159 // Use deterministic_fab_to_fab for ADD operations to ensure bitwise
160 // reproducibility. The atomic add operations (fab_to_fab_atomic_add) are
161 // non-deterministic because the order of atomic additions is not guaranteed.
162 detail::deterministic_fab_to_fab<value_type, value_type>(
163 loc_copy_tags, scomp, dcomp, ncomp, detail::CellAdd<value_type, value_type>());
164 } else if (is_thread_safe) {
165 detail::fab_to_fab<value_type, value_type>(loc_copy_tags, scomp,
166 dcomp, ncomp, detail::CellAdd<value_type, value_type>());
167 } else {
168 detail::fab_to_fab_atomic_add<value_type, value_type>(
169 loc_copy_tags, scomp, dcomp, ncomp, masks);
170 }
171 }
172}
173#endif
174
175}
176
177#endif
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
__host__ __device__ const IntVectND< dim > & smallEnd() const &noexcept
Return the inclusive lower bound of the box.
Definition AMReX_Box.H:111
CpOp
parallel copy or add
Definition AMReX_FabArrayBase.H:394
@ ADD
Definition AMReX_FabArrayBase.H:394
@ COPY
Definition AMReX_FabArrayBase.H:394
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:351
void PC_local_cpu(const CPC &thecpc, FabArray< FAB > const &src, int scomp, int dcomp, int ncomp, CpOp op)
Definition AMReX_PCI.H:8
void PC_local_gpu(const CPC &thecpc, FabArray< FAB > const &src, int scomp, int dcomp, int ncomp, CpOp op, bool deterministic)
Definition AMReX_PCI.H:90
FAB * fabPtr(const MFIter &mfi) noexcept
Return pointer to FAB.
Definition AMReX_FabArray.H:1782
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:88
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:172
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
Long size() const noexcept
Definition AMReX_Vector.H:53
Definition AMReX_Amr.cpp:50
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2867
void LoopConcurrentOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:388
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2862
__host__ __device__ constexpr int get(IntVectND< dim > const &iv) noexcept
Get I'th element of IntVectND<dim>
Definition AMReX_IntVect.H:1334
Definition AMReX_TagParallelFor.H:26
Definition AMReX_TagParallelFor.H:50
Array4< T > dfab
Definition AMReX_TagParallelFor.H:51
Definition AMReX_Dim3.H:12
parallel copy or add
Definition AMReX_FabArrayBase.H:538
bool m_threadsafe_loc
Definition AMReX_FabArrayBase.H:474
std::unique_ptr< CopyComTagsContainer > m_LocTags
Definition AMReX_FabArrayBase.H:476
Used by a bunch of routines when communicating via MPI.
Definition AMReX_FabArrayBase.H:195
Box sbox
Definition AMReX_FabArrayBase.H:197
int srcIndex
Definition AMReX_FabArrayBase.H:199
Box dbox
Definition AMReX_FabArrayBase.H:196
int dstIndex
Definition AMReX_FabArrayBase.H:198
Definition AMReX_TypeTraits.H:282