Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_NeighborParticlesGPUImpl.H
Go to the documentation of this file.
1#ifndef AMREX_NEIGHBORPARTICLESGPUIMPL_H_
2#define AMREX_NEIGHBORPARTICLESGPUIMPL_H_
3#include <AMReX_Config.H>
4
5namespace amrex {
6
8namespace detail
9{
10 template <typename F>
12 void forEachIntersectingTile (IntVect const& iv, int nGrow,
13 Box const& grid_box, IntVect const& periodic_shift,
14 bool do_tiling, IntVect const& tile_size, F&& f)
15 {
16 Box cell_box(iv, iv);
17 cell_box.grow(nGrow);
18 cell_box += periodic_shift;
19 cell_box &= grid_box;
20
21 if (!cell_box.ok()) { return; }
22
23 auto const& cb_lo = cell_box.smallEnd();
24 auto const& cb_hi = cell_box.bigEnd();
25
26#if (AMREX_SPACEDIM == 1)
27 for (int i = cb_lo[0]; i <= cb_hi[0]; ++i) {
28 IntVect cell(AMREX_D_DECL(i, 0, 0));
29 Box tbx;
30 int tile = getTileIndex(cell, grid_box, do_tiling, tile_size, tbx);
31 IntVect rep(AMREX_D_DECL(amrex::max(cb_lo[0], tbx.smallEnd(0)), 0, 0));
32 if (cell == rep) {
33 f(tile);
34 }
35 }
36#elif (AMREX_SPACEDIM == 2)
37 for (int j = cb_lo[1]; j <= cb_hi[1]; ++j) {
38 for (int i = cb_lo[0]; i <= cb_hi[0]; ++i) {
39 IntVect cell(AMREX_D_DECL(i, j, 0));
40 Box tbx;
41 int tile = getTileIndex(cell, grid_box, do_tiling, tile_size, tbx);
42 IntVect rep(AMREX_D_DECL(amrex::max(cb_lo[0], tbx.smallEnd(0)),
43 amrex::max(cb_lo[1], tbx.smallEnd(1)),
44 0));
45 if (cell == rep) {
46 f(tile);
47 }
48 }
49 }
50#else
51 for (int k = cb_lo[2]; k <= cb_hi[2]; ++k) {
52 for (int j = cb_lo[1]; j <= cb_hi[1]; ++j) {
53 for (int i = cb_lo[0]; i <= cb_hi[0]; ++i) {
54 IntVect cell(AMREX_D_DECL(i, j, k));
55 Box tbx;
56 int tile = getTileIndex(cell, grid_box, do_tiling, tile_size, tbx);
57 IntVect rep(AMREX_D_DECL(amrex::max(cb_lo[0], tbx.smallEnd(0)),
58 amrex::max(cb_lo[1], tbx.smallEnd(1)),
59 amrex::max(cb_lo[2], tbx.smallEnd(2))));
60 if (cell == rep) {
61 f(tile);
62 }
63 }
64 }
65 }
66#endif
67 }
68
69 inline Vector<Box> getBoundaryBoxes(const Box& box, int ncells)
70 {
71 AMREX_ASSERT_WITH_MESSAGE(box.size() > 2*IntVect(AMREX_D_DECL(ncells, ncells, ncells)),
72 "Too many cells requested in getBoundaryBoxes");
73
74 AMREX_ASSERT_WITH_MESSAGE(box.ixType().cellCentered(),
75 "Box must be cell-centered");
76
77 Vector<Box> bl;
78 for (int i = 0; i < AMREX_SPACEDIM; ++i) {
79 BoxList face_boxes;
80 Box hi_face_box = adjCellHi(box, i, ncells);
81 Box lo_face_box = adjCellLo(box, i, ncells);
82 face_boxes.push_back(hi_face_box); bl.push_back(hi_face_box);
83 face_boxes.push_back(lo_face_box); bl.push_back(lo_face_box);
84 for (auto face_box : face_boxes) {
85 for (int j = 0; j < AMREX_SPACEDIM; ++j) {
86 if (i == j) { continue; }
87 BoxList edge_boxes;
88 Box hi_edge_box = adjCellHi(face_box, j, ncells);
89 Box lo_edge_box = adjCellLo(face_box, j, ncells);
90 edge_boxes.push_back(hi_edge_box); bl.push_back(hi_edge_box);
91 edge_boxes.push_back(lo_edge_box); bl.push_back(lo_edge_box);
92 for (auto edge_box : edge_boxes) {
93 for (int k = 0; k < AMREX_SPACEDIM; ++k) {
94 if ((j == k) || (i == k)) { continue; }
95 Box hi_corner_box = adjCellHi(edge_box, k, ncells);
96 Box lo_corner_box = adjCellLo(edge_box, k, ncells);
97 bl.push_back(hi_corner_box);
98 bl.push_back(lo_corner_box);
99 }
100 }
101 }
102 }
103 }
104
106 return bl;
107 }
108}
110
111template <typename T_ParticleType, int NArrayReal, int NArrayInt,
112 template<class> class Allocator, class CellAssignor>
113void
116{
117 BL_PROFILE("NeighborParticleContainer_impl::buildNeighborMask");
118 m_neighbor_mask_initialized = true;
119 const int lev = 0;
120 const Geometry& geom = this->Geom(lev);
121 const BoxArray& ba = this->ParticleBoxArray(lev);
122 const DistributionMapping& dmap = this->ParticleDistributionMap(lev);
123
124 if (ba.size() == 1 && (! geom.isAnyPeriodic()) ) { return; }
125
126 if (!m_neighbor_mask_initialized ||
127 !BoxArray::SameRefs(m_cached_neighbor_ba, ba) ||
128 !DistributionMapping::SameRefs(m_cached_neighbor_dm, dmap))
129 {
130 const Periodicity& periodicity = geom.periodicity();
131 const std::vector<IntVect>& pshifts = periodicity.shiftIntVect();
132
133 for (MFIter mfi(ba, dmap); mfi.isValid(); ++mfi)
134 {
135 int grid = mfi.index();
136
137 std::set<NeighborTask> neighbor_grids;
138 for (auto pshift : pshifts)
139 {
140 const Box box = ba[mfi] + pshift;
141
142 const bool first_only = false;
143 auto isecs = ba.intersections(box, first_only, m_num_neighbor_cells);
144
145 for (auto& isec : isecs)
146 {
147 int nbor_grid = isec.first;
148 const Box isec_box = isec.second - pshift;
149 neighbor_grids.insert(NeighborTask(nbor_grid, isec_box, pshift));
150 const int global_rank = dmap[nbor_grid];
151 neighbor_procs.push_back(ParallelContext::global_to_local_rank(global_rank));
152 }
153 }
154
155 Gpu::HostVector<Box> h_isec_boxes;
157 for (auto nbor_grid : neighbor_grids)
158 {
159 NeighborCode code;
160 code.grid_id = nbor_grid.grid_id;
161 code.grid_box = ba[nbor_grid.grid_id];
162 code.periodic_shift = nbor_grid.periodic_shift;
163 h_code_arr.push_back(code);
164 h_isec_boxes.push_back(nbor_grid.box);
165 }
166
167 m_code_array[grid].resize(h_code_arr.size());
168 Gpu::copyAsync(Gpu::hostToDevice, h_code_arr.begin(), h_code_arr.end(),
169 m_code_array[grid].begin());
170 m_isec_boxes[grid].resize(h_isec_boxes.size());
171 Gpu::copyAsync(Gpu::hostToDevice, h_isec_boxes.begin(), h_isec_boxes.end(),
172 m_isec_boxes[grid].begin());
173
175 }
176
177 m_cached_neighbor_ba = ba;
178 m_cached_neighbor_dm = dmap;
179 m_neighbor_mask_initialized = true;
180 RemoveDuplicates(neighbor_procs);
181 }
182}
183
184template <typename T_ParticleType, int NArrayReal, int NArrayInt,
185 template<class> class Allocator, class CellAssignor>
186void
188buildNeighborCopyOp (bool use_boundary_neighbor)
189{
190 BL_PROFILE("NeighborParticleContainer_impl::buildNeighborCopyOp()");
191
192 AMREX_ASSERT(!hasNeighbors() || use_boundary_neighbor);
193
194 const int lev = 0;
195 const auto& geom = this->Geom(lev);
196 const auto dxi = this->Geom(lev).InvCellSizeArray();
197 const auto plo = this->Geom(lev).ProbLoArray();
198 const auto domain = this->Geom(lev).Domain();
199 auto& plev = this->GetParticles(lev);
200 auto& ba = this->ParticleBoxArray(lev);
201
202 if (ba.size() == 1 && (! geom.isAnyPeriodic()) ) { return; }
203
204 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
205 {
206 int gid = mfi.index();
207 int tid = mfi.LocalTileIndex();
208 auto index = std::make_pair(gid, tid);
209
210 auto& src_tile = plev[index];
211 auto const src_ptile_data = src_tile.getConstParticleTileData();
212 const size_t np_real = src_tile.numParticles();
213
214 size_t np = np_real;
215 if (use_boundary_neighbor) {
216 np = m_boundary_particle_ids[lev][index].size();
217 }
218 else {
219 m_boundary_particle_ids.resize(1);
220 m_boundary_particle_ids[lev][index];
221 }
222
223 const auto* p_bndry_pid = m_boundary_particle_ids[lev][index].dataPtr();
224
225 Gpu::DeviceVector<int> counts(np + 1, 0);
226 Gpu::DeviceVector<int> offsets(np + 1);
227 auto p_counts = counts.dataPtr();
228 auto p_offsets = offsets.dataPtr();
229
230 auto p_code_array = m_code_array[gid].dataPtr();
231 auto p_isec_boxes = m_isec_boxes[gid].dataPtr();
232 const int nisec_box = m_isec_boxes[gid].size();
233 const bool do_tiling = this->do_tiling;
234 const IntVect tile_size = this->tile_size;
235 const int nGrow = m_num_neighbor_cells;
236
237 AMREX_FOR_1D ( np, i,
238 {
239 // note that cannot use ternary statement here because p_bndry is not
240 // properly allocated when use_boundary_neighbor=false
241 int pid = i;
242 if (use_boundary_neighbor) {
243 pid = p_bndry_pid[i];
244 }
245 IntVect iv = getParticleCell(src_ptile_data, pid, plo, dxi, domain);
246 for (int j=0; j<nisec_box; ++j) {
247 if (p_isec_boxes[j].contains(iv)) {
248 detail::forEachIntersectingTile(iv, nGrow,
249 p_code_array[j].grid_box,
250 p_code_array[j].periodic_shift,
251 do_tiling, tile_size,
252 [&] (int dst_tile)
253 {
254 bool is_self = (p_code_array[j].grid_id == gid) && (dst_tile == tid)
255 AMREX_D_TERM( && (p_code_array[j].periodic_shift[0] == 0),
256 && (p_code_array[j].periodic_shift[1] == 0),
257 && (p_code_array[j].periodic_shift[2] == 0));
258 if (!is_self) {
259 ++p_counts[i];
260 }
261 });
262 }
263 }
264 });
265
266 amrex::Gpu::exclusive_scan(counts.begin(), counts.end(), offsets.begin());
267
268 int num_copies;
269 Gpu::dtoh_memcpy_async(&num_copies, offsets.data()+np, sizeof(int));
271
272 neighbor_copy_op.resize(gid, tid, lev, num_copies);
273
274 auto tile_index = std::make_pair(gid, tid);
275 auto p_boxes = neighbor_copy_op.m_boxes[lev][tile_index].dataPtr();
276 auto p_levs = neighbor_copy_op.m_levels[lev][tile_index].dataPtr();
277 auto p_tiles = neighbor_copy_op.m_tiles[lev][tile_index].dataPtr();
278 auto p_src_indices = neighbor_copy_op.m_src_indices[lev][tile_index].dataPtr();
279 auto p_periodic_shift = neighbor_copy_op.m_periodic_shift[lev][tile_index].dataPtr();
280
282 AMREX_FOR_1D ( np, i,
283 {
284 int pid = i;
285 if (use_boundary_neighbor) {
286 pid = p_bndry_pid[i];
287 }
288 IntVect iv = getParticleCell(src_ptile_data, pid, plo, dxi, domain);
289 int k = p_offsets[i];
290 for (int j=0; j<nisec_box; ++j) {
291 if (p_isec_boxes[j].contains(iv)) {
292 detail::forEachIntersectingTile(iv, nGrow,
293 p_code_array[j].grid_box,
294 p_code_array[j].periodic_shift,
295 do_tiling, tile_size,
296 [&] (int dst_tile)
297 {
298 bool is_self = (p_code_array[j].grid_id == gid) && (dst_tile == tid)
299 AMREX_D_TERM( && (p_code_array[j].periodic_shift[0] == 0),
300 && (p_code_array[j].periodic_shift[1] == 0),
301 && (p_code_array[j].periodic_shift[2] == 0));
302 if (!is_self) {
303 p_boxes[k] = p_code_array[j].grid_id;
304 p_tiles[k] = dst_tile;
305 p_levs[k] = 0;
306 p_periodic_shift[k] = p_code_array[j].periodic_shift;
307 p_src_indices[k] = pid;
308 ++k;
309 }
310 });
311 }
312 }
313 AMREX_ALWAYS_ASSERT(k == p_offsets[i+1]);
314 });
315
317 }
318}
319
320template <typename T_ParticleType, int NArrayReal, int NArrayInt,
321 template<class> class Allocator, class CellAssignor>
322void
325{
326 BL_PROFILE("NeighborParticleContainer::fillNeighbors");
327
328 AMREX_ASSERT(numParticlesOutOfRange(*this, 0) == 0);
329
330 buildNeighborMask();
331 this->defineBufferMap();
332
333 neighbor_copy_op.clear();
334 neighbor_copy_plan.clear();
335 buildNeighborCopyOp();
336 neighbor_copy_plan.build(*this, neighbor_copy_op, ghost_int_comp,
337 ghost_real_comp, true);
338 updateNeighborsGPU(false);
339}
340
341template <typename T_ParticleType, int NArrayReal, int NArrayInt,
342 template<class> class Allocator, class CellAssignor>
343void
345updateNeighborsGPU (bool boundary_neighbors_only)
346{
347 BL_PROFILE("NeighborParticleContainer::updateNeighborsGPU");
348
349 if (boundary_neighbors_only) {
350 neighbor_copy_op.clear();
351 neighbor_copy_plan.clear();
352 buildNeighborCopyOp(true);
353 neighbor_copy_plan.build(*this, neighbor_copy_op, ghost_int_comp,
354 ghost_real_comp, true);
355 }
356
357 clearNeighbors();
358
359 if (this->use_comms_arena) {
360 snd_buffer.setArena(The_Comms_Arena());
361 rcv_buffer.setArena(The_Comms_Arena());
362 }
363
364 packBuffer(*this, neighbor_copy_op, neighbor_copy_plan, snd_buffer);
366 {
367 neighbor_copy_plan.buildMPIFinish(this->BufferMap());
368 communicateParticlesStart(*this, neighbor_copy_plan, snd_buffer, rcv_buffer);
369 unpackBuffer(*this, neighbor_copy_plan, snd_buffer, NeighborUnpackPolicy());
370 communicateParticlesFinish(neighbor_copy_plan);
371 unpackRemotes(*this, neighbor_copy_plan, rcv_buffer, NeighborUnpackPolicy());
372 }
373 else
374 {
376 if (snd_buffer.arena()->isPinned()) {
377 neighbor_copy_plan.buildMPIFinish(this->BufferMap());
379 communicateParticlesStart(*this, neighbor_copy_plan, snd_buffer, pinned_rcv_buffer);
380 } else {
381 pinned_snd_buffer.resize(snd_buffer.size());
382 Gpu::dtoh_memcpy_async(pinned_snd_buffer.dataPtr(), snd_buffer.dataPtr(), snd_buffer.size());
383 neighbor_copy_plan.buildMPIFinish(this->BufferMap());
385 communicateParticlesStart(*this, neighbor_copy_plan, pinned_snd_buffer, pinned_rcv_buffer);
386 }
387
388 rcv_buffer.resize(pinned_rcv_buffer.size());
389 unpackBuffer(*this, neighbor_copy_plan, snd_buffer, NeighborUnpackPolicy());
390 communicateParticlesFinish(neighbor_copy_plan);
391 Gpu::htod_memcpy_async(rcv_buffer.dataPtr(), pinned_rcv_buffer.dataPtr(), pinned_rcv_buffer.size());
392 unpackRemotes(*this, neighbor_copy_plan, rcv_buffer, NeighborUnpackPolicy());
393 }
394
396}
397
398template <typename T_ParticleType, int NArrayReal, int NArrayInt,
399 template<class> class Allocator, class CellAssignor>
400void
403{
404 BL_PROFILE("NeighborParticleContainer::clearNeighborsGPU");
405
406 this->reserveData();
407 this->resizeData();
408 for (int lev = 0; lev < this->numLevels(); ++lev)
409 {
410 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
411 {
412 auto& ptile = this->DefineAndReturnParticleTile(lev, mfi);
413 ptile.setNumNeighbors(0);
414 }
415 }
416}
417
418}
419
420#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_FOR_1D(...)
Definition AMReX_GpuLaunchMacrosC.nolint.H:97
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:564
std::vector< std::pair< int, Box > > intersections(const Box &bx) const
Return intersections of Box and BoxArray.
Definition AMReX_BoxArray.cpp:1186
static bool SameRefs(const BoxArray &lhs, const BoxArray &rhs)
whether two BoxArrays share the same data
Definition AMReX_BoxArray.H:837
Long size() const noexcept
Return the number of boxes in the BoxArray.
Definition AMReX_BoxArray.H:611
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
static bool SameRefs(const DistributionMapping &lhs, const DistributionMapping &rhs)
Definition AMReX_DistributionMapping.H:189
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:75
bool isAnyPeriodic() const noexcept
Is domain periodic in any direction?
Definition AMReX_Geometry.H:339
Periodicity periodicity() const noexcept
Definition AMReX_Geometry.H:361
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:88
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:172
void updateNeighborsGPU(bool boundary_neighbors_only=false)
Definition AMReX_NeighborParticlesGPUImpl.H:345
void buildNeighborCopyOp(bool use_boundary_neighbor=false)
Definition AMReX_NeighborParticlesGPUImpl.H:188
void buildNeighborMask()
Definition AMReX_NeighborParticlesGPUImpl.H:115
void fillNeighborsGPU()
Definition AMReX_NeighborParticlesGPUImpl.H:324
void clearNeighborsGPU()
Definition AMReX_NeighborParticlesGPUImpl.H:402
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
iterator begin() noexcept
Definition AMReX_PODVector.H:674
iterator end() noexcept
Definition AMReX_PODVector.H:678
T * dataPtr() noexcept
Definition AMReX_PODVector.H:670
T * data() noexcept
Definition AMReX_PODVector.H:666
void push_back(const T &a_value)
Definition AMReX_PODVector.H:629
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
std::vector< IntVect > shiftIntVect(IntVect const &nghost=IntVect(0)) const
Definition AMReX_Periodicity.cpp:8
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition AMReX_Scan.H:1193
Arena * The_Comms_Arena()
Definition AMReX_Arena.cpp:880
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:228
static constexpr HostToDevice hostToDevice
Definition AMReX_GpuContainers.H:105
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:435
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:421
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
bool UseGpuAwareMpi()
Definition AMReX_ParallelDescriptor.H:113
Definition AMReX_Amr.cpp:50
__host__ __device__ BoxND< dim > adjCellHi(const BoxND< dim > &b, int dir, int len=1) noexcept
Similar to adjCellLo but builds an adjacent BoxND on the high end.
Definition AMReX_Box.H:1735
__host__ __device__ int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
Definition AMReX_ParticleUtil.H:185
void communicateParticlesStart(const PC &pc, ParticleCopyPlan &plan, const SndBuffer &snd_buffer, RcvBuffer &rcv_buffer)
Definition AMReX_ParticleCommunication.H:909
__host__ __device__ BoxND< dim > adjCellLo(const BoxND< dim > &b, int dir, int len=1) noexcept
Return the cell centered BoxND of length len adjacent to b on the low end along the coordinate direct...
Definition AMReX_Box.H:1714
void unpackRemotes(PC &pc, const ParticleCopyPlan &plan, Buffer &rcv_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:1009
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
void communicateParticlesFinish(const ParticleCopyPlan &plan)
Definition AMReX_ParticleCommunication.cpp:445
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:44
int numParticlesOutOfRange(Iterator const &pti, int nGrow)
Returns the number of particles that are more than nGrow cells from the box correspond to the input i...
Definition AMReX_ParticleUtil.H:35
void RemoveDuplicates(Vector< T > &vec)
Definition AMReX_Vector.H:211
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:782
void packBuffer(const PC &pc, const ParticleCopyOp &op, const ParticleCopyPlan &plan, Buffer &snd_buffer)
Definition AMReX_ParticleCommunication.H:582
__host__ __device__ IntVect getParticleCell(P const &p, amrex::GpuArray< amrex::Real, 3 > const &plo, amrex::GpuArray< amrex::Real, 3 > const &dxi) noexcept
Returns the cell index for a given particle using the provided lower bounds and cell sizes.
Definition AMReX_ParticleUtil.H:337
Definition AMReX_NeighborParticles.H:17
Box grid_box
Definition AMReX_NeighborParticles.H:19
IntVect periodic_shift
Definition AMReX_NeighborParticles.H:20
int grid_id
Definition AMReX_NeighborParticles.H:18
Definition AMReX_NeighborParticles.H:451
Definition AMReX_ParticleCommunication.H:27