1#ifndef AMREX_PARTICLECOMMUNICATION_H_
2#define AMREX_PARTICLECOMMUNICATION_H_
3#include <AMReX_Config.H>
24class ParticleContainerBase;
28 template <
class PTile>
29 void resizeTiles (std::vector<PTile*>& tiles,
const std::vector<int>& sizes, std::vector<int>& offsets)
const
31 for(
int i = 0; i < std::ssize(sizes); ++i)
33 int offset = tiles[i]->numTotalParticles();
34 int nn = tiles[i]->getNumNeighbors();
35 tiles[i]->setNumNeighbors(nn + sizes[i]);
43 template <
class PTile>
44 void resizeTiles (std::vector<PTile*>& tiles,
const std::vector<int>& sizes, std::vector<int>& offsets)
const
46 int N =
static_cast<int>(sizes.size());
48 std::map<PTile*, int> tile_sizes;
49 for(
int i = 0; i < N; ++i) {
50 tile_sizes[tiles[i]] = tiles[i]->numParticles();
53 for(
int i = 0; i < N; ++i)
55 offsets.push_back(tile_sizes[tiles[i]]);
56 tile_sizes[tiles[i]] += sizes[i];
59 for (
auto& kv : tile_sizes) {
60 kv.first->resize(kv.second);
79 void resize (
int gid,
int tid,
int lev,
int size);
83 if (
m_boxes.size() <= lev) {
return 0; }
84 auto mit =
m_boxes[lev].find(index);
85 return mit ==
m_boxes[lev].end() ? 0 :
int(mit->second.size());
99#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
102 const int* boxes =
nullptr;
103 const int* levs =
nullptr;
104 const int* tiles =
nullptr;
105 int* dst_indices =
nullptr;
114#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
115 , omp_copy_offsets(1, 0)
122#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
129 template <
class PC,
class F>
134 auto&& batch_fn = std::forward<F>(f);
136 for (
int lev = 0; lev < num_levels; ++lev)
138 for (
const auto& kv : pc.GetParticles(lev))
140 auto index = kv.first;
141 int num_copies = op.
numCopies(index, lev);
142 if (num_copies == 0) {
continue; }
145 dst_indices.resize(num_copies);
147 batch_fn(lev, index, num_copies, dst_indices);
152 template <
class PC,
class GetBucket>
156 BL_PROFILE(
"ParticleCopyPlan::buildCopiesStableOrdered");
182 for (
int i = 0; i < num_copies; ++i) {
183 int dst_box = h_boxes[i];
185 int dst_tile = h_tiles[i];
186 int dst_lev = h_levs[i];
187 int dst_index =
static_cast<int>(workspace.
h_box_counts[getBucket(dst_lev, dst_box, dst_tile)]++);
188 h_dst_indices[i] = dst_index;
194 h_dst_indices.
begin(), h_dst_indices.
end(),
195 dst_indices.
begin());
200#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
201 template <
class PC,
class GetBucket>
203 TwoPassHostAlgorithm, BuildWorkspace& workspace,
GetBucket const& getBucket)
205 BL_PROFILE(
"ParticleCopyPlan::buildCopiesTwoPassHost");
212 const auto* p_boxes = op.
m_boxes[lev].at(index).dataPtr();
213 const auto* p_levs = op.
m_levels[lev].at(index).dataPtr();
214 const auto* p_tiles = op.
m_tiles[lev].at(index).dataPtr();
215 auto* p_dst_indices = dst_indices.
dataPtr();
217 workspace.omp_copy_work.push_back({p_boxes, p_levs, p_tiles, p_dst_indices, num_copies});
218 workspace.omp_copy_offsets.push_back(workspace.omp_copy_offsets.back() + num_copies);
221 if (workspace.omp_copy_work.empty()) {
return; }
223 std::fill(workspace.omp_thread_box_counts.begin(), workspace.omp_thread_box_counts.end(), 0U);
224 auto* p_omp_thread_box_counts = workspace.omp_thread_box_counts.data();
225 const auto* p_omp_copy_work = workspace.omp_copy_work.data();
226 const auto* p_omp_copy_offsets = workspace.omp_copy_offsets.data();
227 const Long total_num_copies = workspace.omp_copy_offsets.back();
233 Long ibegin = thread_num*total_num_copies/num_threads;
234 Long iend = (thread_num+1)*total_num_copies/num_threads;
235 auto* p_thread_box_counts = p_omp_thread_box_counts + thread_num*workspace.num_buckets;
239 int iwork =
static_cast<int>(std::upper_bound(workspace.omp_copy_offsets.begin(),
240 workspace.omp_copy_offsets.end(),
242 - workspace.omp_copy_offsets.begin()) - 1;
243 while (iwork <
static_cast<int>(workspace.omp_copy_work.size()) &&
244 p_omp_copy_offsets[iwork] < iend)
246 auto const& work = p_omp_copy_work[iwork];
247 Long global_begin = std::max(ibegin, p_omp_copy_offsets[iwork]);
248 Long global_end = std::min(iend, p_omp_copy_offsets[iwork+1]);
249 int local_begin =
static_cast<int>(global_begin - p_omp_copy_offsets[iwork]);
250 int local_end =
static_cast<int>(global_end - p_omp_copy_offsets[iwork]);
251 for (
int i = local_begin; i < local_end; ++i)
253 int dst_box = work.boxes[i];
256 int dst_tile = work.tiles[i];
257 int dst_lev = work.levs[i];
258 ++p_thread_box_counts[getBucket(dst_lev, dst_box, dst_tile)];
267 for (
int ibucket = 0; ibucket < workspace.num_buckets; ++ibucket)
269 unsigned int offset = workspace.h_box_counts[ibucket];
270 for (
int tid = 0; tid < num_threads; ++tid)
272 auto& count = p_omp_thread_box_counts[tid*workspace.num_buckets + ibucket];
273 unsigned int total = count;
277 workspace.h_box_counts[ibucket] =
offset;
282 int iwork =
static_cast<int>(std::upper_bound(workspace.omp_copy_offsets.begin(),
283 workspace.omp_copy_offsets.end(),
285 - workspace.omp_copy_offsets.begin()) - 1;
286 while (iwork <
static_cast<int>(workspace.omp_copy_work.size()) &&
287 p_omp_copy_offsets[iwork] < iend)
289 auto const& work = p_omp_copy_work[iwork];
290 Long global_begin = std::max(ibegin, p_omp_copy_offsets[iwork]);
291 Long global_end = std::min(iend, p_omp_copy_offsets[iwork+1]);
292 int local_begin =
static_cast<int>(global_begin - p_omp_copy_offsets[iwork]);
293 int local_end =
static_cast<int>(global_end - p_omp_copy_offsets[iwork]);
294 for (
int i = local_begin; i < local_end; ++i)
296 int dst_box = work.boxes[i];
299 int dst_tile = work.tiles[i];
300 int dst_lev = work.levs[i];
301 int bucket = getBucket(dst_lev, dst_box, dst_tile);
302 work.dst_indices[i] =
static_cast<int>(p_thread_box_counts[bucket]++);
312 template <
class PC,
class GetBucket>
316 BL_PROFILE(
"ParticleCopyPlan::buildCopiesAtomicScatter");
321 [&op, &getBucket, p_dst_box_counts] (
int lev,
TileKey const& index,
324 const auto* p_boxes = op.
m_boxes[lev].at(index).dataPtr();
325 const auto* p_levs = op.
m_levels[lev].at(index).dataPtr();
326 const auto* p_tiles = op.
m_tiles[lev].at(index).dataPtr();
327 auto* p_dst_indices = dst_indices.
dataPtr();
331 int dst_box = p_boxes[i];
334 int dst_tile = p_tiles[i];
335 int dst_lev = p_levs[i];
337 &p_dst_box_counts[getBucket(dst_lev, dst_box, dst_tile)], 1U));
338 p_dst_indices[i] = dst_index;
346 if (use_host_box_counters) {
407 template <class PC, std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
417 pp.
query(
"do_one_sided_comms", m_do_one_sided_comms);
418 const int num_buckets = pc.BufferMap().numBuckets();
435#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
436 constexpr bool use_host_bucket_counters =
true;
438 constexpr bool use_host_bucket_counters =
false;
441 bool use_host_box_counters = pc.stableRedistribute() || use_host_bucket_counters;
442 if (use_host_box_counters) {
446 if (pc.stableRedistribute())
452#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
481 int NStructReal = PC::ParticleContainerType::NStructReal;
482 int NStructInt = PC::ParticleContainerType::NStructInt;
484 int num_real_comm_comp = 0;
485 int comm_comps_start = 0;
486 if constexpr (!PC::ParticleType::is_soa_particle) {
487 comm_comps_start += AMREX_SPACEDIM + NStructReal;
489 for (
int i = comm_comps_start; i < std::ssize(real_comp_mask); ++i) {
490 if (real_comp_mask[i]) {++num_real_comm_comp;}
493 int num_int_comm_comp = 0;
494 for (
int i = 2 + NStructInt; i < std::ssize(int_comp_mask); ++i) {
495 if (int_comp_mask[i]) {++num_int_comm_comp;}
498 if constexpr (PC::ParticleType::is_soa_particle) {
504 + num_int_comm_comp *
sizeof(
int);
549 bool m_local =
false;
550 int m_do_one_sided_comms = 0;
569 Long operator() (
int dst_box,
int dst_tile,
int dst_lev, std::size_t psize,
int i)
const
571 int dst_pid =
m_get_pid(dst_lev, dst_box, dst_tile);
578template <
class PC,
class Buffer,
579 std::enable_if_t<IsParticleContainer<PC>::value &&
580 std::is_base_of_v<PolymorphicArenaAllocator<typename Buffer::value_type>,
581 Buffer>,
int> foo = 0>
590 int num_buckets = pc.BufferMap().numBuckets();
592 std::size_t total_buffer_size = 0;
598 total_buffer_size = np*psize;
605 if (! snd_buffer.arena()->hasFreeDeviceMemory(total_buffer_size)) {
609 snd_buffer.resize(total_buffer_size);
614 const auto plo = pc.Geom(0).ProbLoArray();
615 const auto phi = pc.Geom(0).ProbHiArray();
616 const auto is_per = pc.Geom(0).isPeriodicArray();
617#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
620 typename PC::ParticleTileType
const* src_tile =
nullptr;
621 const int* boxes =
nullptr;
622 const int* levels =
nullptr;
623 const int* tiles =
nullptr;
624 const int* src_indices =
nullptr;
625 const IntVect* periodic_shift =
nullptr;
626 const int* dst_indices =
nullptr;
632 for (
int lev = 0; lev < num_levels; ++lev)
634 auto& plev = pc.GetParticles(lev);
635 for (
auto& kv : plev)
637 auto index = kv.first;
638 auto& src_tile = plev.at(index);
639 int num_copies = op.
numCopies(index, lev);
640 if (num_copies == 0) {
continue; }
642 const auto* p_boxes = op.
m_boxes[lev].at(index).dataPtr();
643 const auto* p_levels = op.
m_levels[lev].at(index).dataPtr();
644 const auto* p_tiles = op.
m_tiles[lev].at(index).dataPtr();
645 const auto* p_src_indices = op.
m_src_indices[lev].at(index).dataPtr();
646 const auto* p_periodic_shift = op.
m_periodic_shift[lev].at(index).dataPtr();
647 const auto* p_dst_indices = plan.
m_dst_indices[lev].at(index).dataPtr();
648#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
649 omp_pack_work.push_back({&src_tile, p_boxes, p_levels, p_tiles,
650 p_src_indices, p_periodic_shift, p_dst_indices, num_copies});
651 omp_pack_offsets.push_back(omp_pack_offsets.back() + num_copies);
653 const auto& ptd = src_tile.getConstParticleTileData();
654 auto* p_snd_buffer = snd_buffer.
dataPtr();
658 int dst_box = p_boxes[i];
661 int dst_tile = p_tiles[i];
662 int dst_lev = p_levels[i];
663 auto dst_offset = get_offset(dst_box, dst_tile, dst_lev, psize, p_dst_indices[i]);
664 int src_index = p_src_indices[i];
665 ptd.packParticleData(p_snd_buffer, src_index, dst_offset, p_comm_real, p_comm_int);
667 const IntVect& pshift = p_periodic_shift[i];
668 bool do_periodic_shift =
670 || (is_per[1] && pshift[1] != 0),
671 || (is_per[2] && pshift[2] != 0) );
673 if (do_periodic_shift)
676 Long pos_offset = dst_offset;
678 if constexpr (PC::ParticleType::is_soa_particle) {
679 pos_offset +=
sizeof(uint64_t);
683 for (
int idim = 0; idim < AMREX_SPACEDIM; ++idim)
685 if (! is_per[idim]) {
continue; }
686 if (pshift[idim] > 0) {
687 pos[idim] += phi[idim] - plo[idim];
688 }
else if (pshift[idim] < 0) {
689 pos[idim] -= phi[idim] - plo[idim];
701#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
702 if (!omp_pack_work.empty())
704 auto* p_snd_buffer = snd_buffer.dataPtr();
706 const Long total_num_copies = omp_pack_offsets.back();
712 Long ibegin = thread_num*total_num_copies/num_threads;
713 Long iend = (thread_num+1)*total_num_copies/num_threads;
717 int iwork =
static_cast<int>(std::upper_bound(omp_pack_offsets.begin(),
718 omp_pack_offsets.end(),
720 - omp_pack_offsets.begin()) - 1;
721 while (iwork <
static_cast<int>(omp_pack_work.
size()) &&
722 omp_pack_offsets[iwork] < iend)
724 auto const& work = omp_pack_work[iwork];
725 auto const& ptd = work.src_tile->getConstParticleTileData();
726 Long global_begin = std::max(ibegin, omp_pack_offsets[iwork]);
727 Long global_end = std::min(iend, omp_pack_offsets[iwork+1]);
728 int local_begin =
static_cast<int>(global_begin - omp_pack_offsets[iwork]);
729 int local_end =
static_cast<int>(global_end - omp_pack_offsets[iwork]);
730 for (
int i = local_begin; i < local_end; ++i)
732 int dst_box = work.boxes[i];
735 int dst_tile = work.tiles[i];
736 int dst_lev = work.levels[i];
737 auto dst_offset = get_offset(dst_box, dst_tile, dst_lev, psize,
738 work.dst_indices[i]);
739 int src_index = work.src_indices[i];
740 ptd.packParticleData(p_snd_buffer, src_index, dst_offset,
741 p_comm_real, p_comm_int);
743 const IntVect& pshift = work.periodic_shift[i];
744 bool do_periodic_shift =
746 || (is_per[1] && pshift[1] != 0),
747 || (is_per[2] && pshift[2] != 0) );
749 if (do_periodic_shift)
752 Long pos_offset = dst_offset;
753 if constexpr (PC::ParticleType::is_soa_particle) {
754 pos_offset +=
sizeof(uint64_t);
758 for (
int idim = 0; idim < AMREX_SPACEDIM; ++idim)
760 if (! is_per[idim]) {
continue; }
761 if (pshift[idim] > 0) {
762 pos[idim] += phi[idim] - plo[idim];
763 }
else if (pshift[idim] < 0) {
764 pos[idim] -= phi[idim] - plo[idim];
780template <
class PC,
class Buffer,
class UnpackPolicy,
781 std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
786 using PTile =
typename PC::ParticleTileType;
788 int num_levels = pc.BufferMap().numLevels();
792 std::vector<int> sizes;
793 std::vector<PTile*> tiles;
794 for (
int lev = 0; lev < num_levels; ++lev)
796 for(
MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi)
798 int gid = mfi.index();
799 int tid = mfi.LocalTileIndex();
800 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
801 int num_copies = plan.
m_box_counts_h[pc.BufferMap().gridAndTileAndLevToBucket(gid, tid, lev)];
802 sizes.push_back(num_copies);
803 tiles.push_back(&tile);
808 std::vector<int> offsets;
809 policy.resizeTiles(tiles, sizes, offsets);
814#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
817 PTile* tile =
nullptr;
830 for (
int lev = 0; lev < num_levels; ++lev)
832 auto& plev = pc.GetParticles(lev);
833 for(
MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi)
835 int gid = mfi.index();
836 int tid = mfi.LocalTileIndex();
837 auto index = std::make_pair(gid, tid);
839 auto& tile = plev[index];
843 int offset = offsets[uindex];
844 int size = sizes[uindex];
847#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
848 omp_unpack_work.push_back({&tile, gid, tid, lev,
offset, size});
849 omp_unpack_offsets.push_back(omp_unpack_offsets.back() + size);
851 auto p_snd_buffer = snd_buffer.
dataPtr();
852 auto ptd = tile.getParticleTileData();
855 auto src_offset = get_offset(gid, tid, lev, psize, i);
856 int dst_index =
offset + i;
857 ptd.unpackParticleData(p_snd_buffer, src_offset, dst_index, p_comm_real, p_comm_int);
863#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
864 if (!omp_unpack_work.empty())
867 auto p_snd_buffer = snd_buffer.dataPtr();
868 const Long total_num_copies = omp_unpack_offsets.back();
874 Long ibegin = thread_num*total_num_copies/num_threads;
875 Long iend = (thread_num+1)*total_num_copies/num_threads;
879 int iwork =
static_cast<int>(std::upper_bound(omp_unpack_offsets.begin(),
880 omp_unpack_offsets.end(),
882 - omp_unpack_offsets.begin()) - 1;
883 while (iwork <
static_cast<int>(omp_unpack_work.
size()) &&
884 omp_unpack_offsets[iwork] < iend)
886 auto const& work = omp_unpack_work[iwork];
887 auto ptd = work.tile->getParticleTileData();
888 Long global_begin = std::max(ibegin, omp_unpack_offsets[iwork]);
889 Long global_end = std::min(iend, omp_unpack_offsets[iwork+1]);
890 int local_begin =
static_cast<int>(global_begin - omp_unpack_offsets[iwork]);
891 int local_end =
static_cast<int>(global_end - omp_unpack_offsets[iwork]);
892 for (
int i = local_begin; i < local_end; ++i)
894 auto src_offset = get_offset(work.gid, work.tid, work.lev, psize, i);
895 int dst_index = work.offset + i;
896 ptd.unpackParticleData(p_snd_buffer, src_offset, dst_index,
897 p_comm_real, p_comm_int);
907template <
class PC,
class SndBuffer,
class RcvBuffer,
908 std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
911 BL_PROFILE(
"amrex::communicateParticlesStart");
918 if (NProcs == 1) {
return; }
926 Long TotRcvBytes = 0;
927 for (
int i = 0; i < NProcs; ++i) {
929 RcvProc.push_back(i);
931 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
933 rOffset.push_back(TotRcvBytes);
939 for (
int i = 0; i < plan.
m_nrcvs; ++i)
948 rcv_buffer.resize(TotRcvBytes);
964 for (
int i = 0; i < plan.
m_nrcvs; ++i) {
965 const auto Who = RcvProc[i];
966 const auto offset = rOffset[i];
968 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
982 for (
int i = 0; i < NProcs; ++i)
984 if (i == MyProc) {
continue; }
987 if (Cnt == 0) {
continue; }
1007template <
class PC,
class Buffer,
class UnpackPolicy,
1008 std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
1015 if (NProcs == 1) {
return; }
1019 using PTile =
typename PC::ParticleTileType;
1025 auto* p_rcv_buffer = rcv_buffer.dataPtr();
1027 std::vector<int> sizes;
1028 std::vector<PTile*> tiles;
1035 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
1036 sizes.push_back(copy_size);
1037 tiles.push_back(&tile);
1041 policy.resizeTiles(tiles, sizes, offsets);
1051 procindex = (rproc == plan.
m_rcv_box_pids[i]) ? procindex : procindex+1;
1054 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
1055 auto ptd = tile.getParticleTileData();
1060 int dst_offset = offsets[uindex];
1061 int size = sizes[uindex];
1069 +
static_cast<Long>(p_pad_adjust[procindex]);
1070 int dst_index = dst_offset + ip;
1071 ptd.unpackParticleData(p_rcv_buffer, src_offset, dst_index,
1072 p_comm_real, p_comm_int);
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
amrex::ParmParse pp
Input file parser instance for the given namespace.
Definition AMReX_HypreIJIface.cpp:15
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:88
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
iterator begin() noexcept
Definition AMReX_PODVector.H:674
iterator end() noexcept
Definition AMReX_PODVector.H:678
T * dataPtr() noexcept
Definition AMReX_PODVector.H:670
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
Parse Parameters From Command Line and Input Files.
Definition AMReX_ParmParse.H:349
int query(std::string_view name, bool &ref, int ival=FIRST) const
Same as querykth() but searches for the last occurrence of name.
Definition AMReX_ParmParse.cpp:1947
Definition AMReX_ParticleBufferMap.H:59
Definition AMReX_ParticleContainerBase.H:43
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
T * dataPtr() noexcept
get access to the underlying data pointer
Definition AMReX_Vector.H:49
Long size() const noexcept
Definition AMReX_Vector.H:53
amrex_particle_real ParticleReal
Floating Point Type for Particles.
Definition AMReX_REAL.H:90
amrex_long Long
Definition AMReX_INT.H:30
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition AMReX_Scan.H:1193
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:326
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:860
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:128
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:228
static constexpr DeviceToHost deviceToHost
Definition AMReX_GpuContainers.H:106
static constexpr HostToDevice hostToDevice
Definition AMReX_GpuContainers.H:105
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
__host__ __device__ void * memcpy(void *dest, const void *src, std::size_t count)
Definition AMReX_GpuUtility.H:226
__host__ __device__ AMREX_FORCE_INLINE T FetchAdd(T *const sum, T const value) noexcept
Definition AMReX_GpuAtomic.H:644
constexpr int get_thread_num()
Definition AMReX_OpenMP.H:37
constexpr int get_num_threads()
Definition AMReX_OpenMP.H:35
constexpr int get_max_threads()
Definition AMReX_OpenMP.H:36
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
Message Asend(const T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1172
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:696
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1214
Definition AMReX_Amr.cpp:50
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
void communicateParticlesStart(const PC &pc, ParticleCopyPlan &plan, const SndBuffer &snd_buffer, RcvBuffer &rcv_buffer)
Definition AMReX_ParticleCommunication.H:909
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
void unpackRemotes(PC &pc, const ParticleCopyPlan &plan, Buffer &rcv_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:1009
void communicateParticlesFinish(const ParticleCopyPlan &plan)
Definition AMReX_ParticleCommunication.cpp:445
const int[]
Definition AMReX_BLProfiler.cpp:1664
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size in bytes, this returns the smallest size greater or equal to size that ...
Definition AMReX_Arena.H:33
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:782
void packBuffer(const PC &pc, const ParticleCopyOp &op, const ParticleCopyPlan &plan, Buffer &snd_buffer)
Definition AMReX_ParticleCommunication.H:582
Definition AMReX_ParticleBufferMap.H:38
Definition AMReX_ParticleBufferMap.H:14
Definition AMReX_ParticleCommunication.H:554
const unsigned int * m_box_offsets
Definition AMReX_ParticleCommunication.H:555
GetPID m_get_pid
Definition AMReX_ParticleCommunication.H:558
GetBucket m_get_bucket
Definition AMReX_ParticleCommunication.H:559
const std::size_t * m_pad_correction
Definition AMReX_ParticleCommunication.H:556
GetSendBufferOffset(const ParticleCopyPlan &plan, const ParticleBufferMap &map)
Definition AMReX_ParticleCommunication.H:561
__device__ Long operator()(int dst_box, int dst_tile, int dst_lev, std::size_t psize, int i) const
Definition AMReX_ParticleCommunication.H:569
Definition AMReX_ParticleCommunication.H:27
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition AMReX_ParticleCommunication.H:29
Definition AMReX_ParticleCommunication.H:66
void resize(int gid, int tid, int lev, int size)
Definition AMReX_ParticleCommunication.cpp:27
void setNumLevels(int num_levels)
Definition AMReX_ParticleCommunication.cpp:18
Vector< std::map< TileKey, Gpu::DeviceVector< IntVect > > > m_periodic_shift
Definition AMReX_ParticleCommunication.H:73
int numLevels() const
Definition AMReX_ParticleCommunication.H:88
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_levels
Definition AMReX_ParticleCommunication.H:70
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_boxes
Definition AMReX_ParticleCommunication.H:69
int numCopies(TileKey const &index, int lev) const
Definition AMReX_ParticleCommunication.H:81
std::pair< int, int > TileKey
Definition AMReX_ParticleCommunication.H:67
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_tiles
Definition AMReX_ParticleCommunication.H:71
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_src_indices
Definition AMReX_ParticleCommunication.H:72
void clear()
Definition AMReX_ParticleCommunication.cpp:9
Definition AMReX_ParticleCommunication.H:97
Definition AMReX_ParticleCommunication.H:111
BuildWorkspace(int a_num_buckets)
Definition AMReX_ParticleCommunication.H:112
Gpu::HostVector< unsigned int > h_box_counts
Definition AMReX_ParticleCommunication.H:120
int num_buckets
Definition AMReX_ParticleCommunication.H:119
Definition AMReX_ParticleCommunication.H:95
Definition AMReX_ParticleCommunication.H:96
Definition AMReX_ParticleCommunication.H:92
Vector< int > m_rcv_box_ids
Definition AMReX_ParticleCommunication.H:366
Vector< std::size_t > m_snd_offsets
Definition AMReX_ParticleCommunication.H:393
Vector< int > m_rcv_box_counts
Definition AMReX_ParticleCommunication.H:364
Vector< std::size_t > m_snd_counts
Definition AMReX_ParticleCommunication.H:394
void finalizeBuildBoxCounts(BuildWorkspace const &workspace, bool use_host_box_counters)
Definition AMReX_ParticleCommunication.H:344
Long m_NumSnds
Definition AMReX_ParticleCommunication.H:371
void buildMPIFinish(const ParticleBufferMap &map)
Definition AMReX_ParticleCommunication.cpp:223
Vector< int > m_neighbor_procs
Definition AMReX_ParticleCommunication.H:385
void clear()
Definition AMReX_ParticleCommunication.cpp:41
Vector< int > m_rcv_box_pids
Definition AMReX_ParticleCommunication.H:368
void buildCopies(const PC &pc, const ParticleCopyOp &op, AtomicScatterAlgorithm, BuildWorkspace &, GetBucket const &getBucket)
Definition AMReX_ParticleCommunication.H:313
Vector< int > m_rcv_box_levs
Definition AMReX_ParticleCommunication.H:369
void build(const PC &pc, const ParticleCopyOp &op, const Vector< int > &int_comp_mask, const Vector< int > &real_comp_mask, int local)
Definition AMReX_ParticleCommunication.H:408
Gpu::DeviceVector< int > d_real_comp_mask
Definition AMReX_ParticleCommunication.H:402
std::pair< int, int > TileKey
Definition AMReX_ParticleCommunication.H:93
Gpu::DeviceVector< std::size_t > m_snd_pad_correction_d
Definition AMReX_ParticleCommunication.H:397
void forEachCopyBatch(const PC &pc, const ParticleCopyOp &op, F &&f)
Definition AMReX_ParticleCommunication.H:130
Long m_superparticle_size
Definition AMReX_ParticleCommunication.H:403
Vector< int > m_rcv_box_tids
Definition AMReX_ParticleCommunication.H:367
Vector< Long > m_Snds
Definition AMReX_ParticleCommunication.H:387
Vector< MPI_Request > m_particle_sreqs
Definition AMReX_ParticleCommunication.H:380
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_dst_indices
Definition AMReX_ParticleCommunication.H:358
Gpu::DeviceVector< unsigned int > m_box_counts_d
Definition AMReX_ParticleCommunication.H:360
Vector< Long > m_Rcvs
Definition AMReX_ParticleCommunication.H:388
Vector< int > m_rcv_box_offsets
Definition AMReX_ParticleCommunication.H:365
Vector< std::size_t > m_snd_pad_correction_h
Definition AMReX_ParticleCommunication.H:396
Vector< MPI_Status > m_particle_sstats
Definition AMReX_ParticleCommunication.H:379
Gpu::DeviceVector< unsigned int > m_box_offsets
Definition AMReX_ParticleCommunication.H:362
Gpu::DeviceVector< std::size_t > m_rcv_pad_correction_d
Definition AMReX_ParticleCommunication.H:400
Vector< std::size_t > m_rOffset
Definition AMReX_ParticleCommunication.H:390
Vector< MPI_Status > m_particle_rstats
Definition AMReX_ParticleCommunication.H:376
Vector< Long > m_snd_num_particles
Definition AMReX_ParticleCommunication.H:382
Vector< MPI_Request > m_particle_rreqs
Definition AMReX_ParticleCommunication.H:377
Long superParticleSize() const
Definition AMReX_ParticleCommunication.H:405
Gpu::HostVector< int > m_rcv_data
Definition AMReX_ParticleCommunication.H:391
void buildCopies(const PC &pc, const ParticleCopyOp &op, StableOrderedAlgorithm, BuildWorkspace &workspace, GetBucket const &getBucket)
Definition AMReX_ParticleCommunication.H:153
Vector< MPI_Status > m_build_stats
Definition AMReX_ParticleCommunication.H:373
Vector< int > m_RcvProc
Definition AMReX_ParticleCommunication.H:389
Vector< std::size_t > m_rcv_pad_correction_h
Definition AMReX_ParticleCommunication.H:399
int m_nrcvs
Definition AMReX_ParticleCommunication.H:372
Gpu::HostVector< unsigned int > m_box_counts_h
Definition AMReX_ParticleCommunication.H:361
Vector< MPI_Request > m_build_rreqs
Definition AMReX_ParticleCommunication.H:374
Vector< Long > m_rcv_num_particles
Definition AMReX_ParticleCommunication.H:383
Gpu::DeviceVector< int > d_int_comp_mask
Definition AMReX_ParticleCommunication.H:402
Definition AMReX_ParticleCommunication.H:42
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition AMReX_ParticleCommunication.H:44