Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_GpuUtility.H
Go to the documentation of this file.
1#ifndef AMREX_GPU_UTILITY_H_
2#define AMREX_GPU_UTILITY_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_GpuTypes.H>
7#include <AMReX_GpuControl.H>
8#include <AMReX_GpuDevice.H>
9#include <AMReX_Extension.H>
10#include <AMReX_REAL.H>
11#include <AMReX_INT.H>
12#include <AMReX_Array.H>
13#include <AMReX_Array4.H>
14#include <iosfwd>
15#include <cmath>
16#include <cstring>
17
18#ifdef AMREX_USE_CUDA
19#include <cuda.h>
20#include <curand_kernel.h> // Is this needed here?
21#endif
22
23namespace amrex {
24namespace Gpu {
25
26 template <typename T>
28 T LDG (Array4<T> const& a, int i, int j, int k) noexcept {
29#if defined(AMREX_USE_CUDA)
30 AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k));))
31 AMREX_IF_ON_HOST((return a(i,j,k);))
32#else
33 return a(i,j,k);
34#endif
35 }
36
37 template <typename T>
39 T LDG (Array4<T> const& a, int i, int j, int k, int n) noexcept {
40#if defined(AMREX_USE_CUDA)
41 AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k,n));))
42 AMREX_IF_ON_HOST((return a(i,j,k,n);))
43#else
44 return a(i,j,k,n);
45#endif
46 }
47
48 inline bool isManaged (void const* p) noexcept {
49#if defined(AMREX_USE_HIP)
50 hipPointerAttribute_t attrib;
51 hipError_t r = hipPointerGetAttributes(&attrib, p);
52#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
53 return r == hipSuccess && attrib.memoryType == hipMemoryTypeManaged;
54#else
55 return r == hipSuccess && attrib.type == hipMemoryTypeManaged;
56#endif
57#elif defined(AMREX_USE_CUDA)
58 CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_IS_MANAGED;
59 unsigned int is_managed = 0;
60 void* data[] = { (void*)(&is_managed) };
61 CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
62 return r == CUDA_SUCCESS && is_managed;
63#elif defined(AMREX_USE_SYCL)
64 auto type = sycl::get_pointer_type(p,Device::syclContext());
65 return type == sycl::usm::alloc::shared;
66#else
68 return false;
69#endif
70 }
71
72 inline bool isDevicePtr (void const* p) noexcept {
73#if defined(AMREX_USE_HIP)
74 hipPointerAttribute_t attrib;
75 hipError_t r = hipPointerGetAttributes(&attrib, p);
76#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
77 return r == hipSuccess && attrib.memoryType == hipMemoryTypeDevice;
78#else
79 return r == hipSuccess && attrib.type == hipMemoryTypeDevice;
80#endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
81#elif defined(AMREX_USE_CUDA)
82 CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
83 CUmemorytype mem_type = static_cast<CUmemorytype>(0);
84 void* data[] = { (void*)(&mem_type) };
85 CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
86 return r == CUDA_SUCCESS && mem_type == CU_MEMORYTYPE_DEVICE;
87#elif defined(AMREX_USE_SYCL)
88 auto type = sycl::get_pointer_type(p,Device::syclContext());
89 return type == sycl::usm::alloc::device;
90#else
92 return false;
93#endif
94 }
95
96 inline bool isPinnedPtr (void const* p) noexcept {
97#if defined(AMREX_USE_HIP)
98 hipPointerAttribute_t attrib;
99 hipError_t r = hipPointerGetAttributes(&attrib, p);
100#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
101 return r == hipSuccess && attrib.memoryType == hipMemoryTypeHost;
102#else
103 return r == hipSuccess && attrib.type == hipMemoryTypeHost;
104#endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
105#elif defined(AMREX_USE_CUDA)
106 CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
107 CUmemorytype mem_type = static_cast<CUmemorytype>(0);
108 void* data[] = { (void*)(&mem_type) };
109 CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
110 return r == CUDA_SUCCESS && mem_type == CU_MEMORYTYPE_HOST;
111#elif defined(AMREX_USE_SYCL)
112 auto type = sycl::get_pointer_type(p,Device::syclContext());
113 return type == sycl::usm::alloc::host;
114#else
116 return false;
117#endif
118 }
119
120 inline bool isGpuPtr (void const* p) noexcept {
121#if defined(AMREX_USE_HIP)
122 hipPointerAttribute_t attrib;
123 hipError_t r = hipPointerGetAttributes(&attrib, p);
124 if (r != hipSuccess) { return false; }
125#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
126 auto t = attrib.memoryType;
127#else
128 auto t = attrib.type;
129#endif
130 return (t == hipMemoryTypeHost ||
131 t == hipMemoryTypeDevice ||
132 t == hipMemoryTypeArray ||
133 t == hipMemoryTypeUnified ||
134 t == hipMemoryTypeManaged);
135#elif defined(AMREX_USE_CUDA)
136 CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
137 CUmemorytype mem_type = static_cast<CUmemorytype>(0);
138 void* data[] = { (void*)(&mem_type) };
139 CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
140 return r == CUDA_SUCCESS &&
141 (mem_type == CU_MEMORYTYPE_HOST ||
142 mem_type == CU_MEMORYTYPE_DEVICE ||
143 mem_type == CU_MEMORYTYPE_ARRAY ||
144 mem_type == CU_MEMORYTYPE_UNIFIED);
145#elif defined(AMREX_USE_SYCL)
146 auto type = sycl::get_pointer_type(p,Device::syclContext());
147 return type != sycl::usm::alloc::unknown;
148#else
150 return false;
151#endif
152 }
153
154 template <class T>
156 bool isnan (T m) noexcept
157 {
158#if defined(_WIN32)
159 AMREX_IF_ON_DEVICE((return m != m;))
160 AMREX_IF_ON_HOST((return std::isnan(m);))
161#elif defined(AMREX_USE_SYCL)
162 return sycl::isnan(m);
163#else
164 return std::isnan(m);
165#endif
166 }
167
168 template <class T>
170 bool isinf (T m) noexcept
171 {
172#if defined(_WIN32)
173 AMREX_IF_ON_DEVICE((return (2*m == m) && (m != 0);))
174 AMREX_IF_ON_HOST((return std::isinf(m);))
175#elif defined(AMREX_USE_SYCL)
176 return sycl::isinf(m);
177#else
178 return std::isinf(m);
179#endif
180 }
181
183 {
185 StreamItInfo () noexcept
186 : device_sync(!Gpu::inNoSyncRegion()) {}
188 device_sync = false;
189 return *this;
190 }
191 };
192
194 {
195 public:
196 StreamIter (int n, bool is_thread_safe=true) noexcept;
197 StreamIter (int n, const StreamItInfo& info, bool is_thread_safe=true) noexcept;
198
199 ~StreamIter ();
200
201 StreamIter (StreamIter const&) = delete;
202 StreamIter (StreamIter &&) = delete;
203 void operator= (StreamIter const&) = delete;
204 void operator= (StreamIter &&) = delete;
205
206 int operator() () const noexcept { return m_i; }
207
208 [[nodiscard]] bool isValid () const noexcept { return m_i < m_n; }
209
210#if !defined(AMREX_USE_GPU)
211 void operator++ () noexcept { ++m_i; }
212#else
213 void operator++ ();
214#endif
215
216 private:
217 void init () noexcept; // NOLINT
218
219 int m_n;
220 int m_i;
221 bool m_threadsafe;
222 bool m_sync;
223 };
224
226void* memcpy (void* dest, const void* src, std::size_t count)
227{
228#if defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP)
229 return ::memcpy(dest, src, count);
230#else
231 return std::memcpy(dest, src, count);
232#endif
233}
234
235} // namespace Gpu
236
237#ifdef AMREX_USE_GPU
238std::ostream& operator<< (std::ostream& os, const dim3& d);
239#endif
240
241using Gpu::isnan;
242using Gpu::isinf;
243
244} // namespace amrex
245
246#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
247/* Somehow I cannot get cudaMemcpyFromSymbol to work. */
248#define AMREX_GET_DEVICE_FUNC_PTR(FP_t, sym) \
249 [] () { \
250 auto* amrex_i_hp = (FP_t*) amrex::The_Pinned_Arena()->alloc(sizeof(FP_t)); \
251 amrex::single_task([=] AMREX_GPU_DEVICE () { *amrex_i_hp = sym; }); \
252 amrex::Gpu::streamSynchronize(); \
253 auto amrex_i_r = *amrex_i_hp; \
254 amrex::The_Pinned_Arena()->free(amrex_i_hp); \
255 return amrex_i_r; \
256 } ()
257#elif !defined(AMREX_USE_SYCL)
258#define AMREX_GET_DEVICE_FUNC_PTR(FP_t, sym) sym
259#endif
260
261#endif
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition AMReX_GpuQualifiers.H:58
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Definition AMReX_GpuUtility.H:194
int operator()() const noexcept
Definition AMReX_GpuUtility.H:206
~StreamIter()
Definition AMReX_GpuUtility.cpp:65
bool isValid() const noexcept
Definition AMReX_GpuUtility.H:208
void operator=(StreamIter const &)=delete
StreamIter(StreamIter &&)=delete
StreamIter(StreamIter const &)=delete
void operator++()
Definition AMReX_GpuUtility.cpp:81
bool isManaged(void const *p) noexcept
Definition AMReX_GpuUtility.H:48
__host__ __device__ T LDG(Array4< T > const &a, int i, int j, int k) noexcept
Definition AMReX_GpuUtility.H:28
bool isGpuPtr(void const *p) noexcept
Definition AMReX_GpuUtility.H:120
__host__ __device__ bool isnan(T m) noexcept
Definition AMReX_GpuUtility.H:156
bool isPinnedPtr(void const *p) noexcept
Definition AMReX_GpuUtility.H:96
bool inNoSyncRegion() noexcept
Definition AMReX_GpuControl.H:148
__host__ __device__ void * memcpy(void *dest, const void *src, std::size_t count)
Definition AMReX_GpuUtility.H:226
__host__ __device__ bool isinf(T m) noexcept
Definition AMReX_GpuUtility.H:170
bool isDevicePtr(void const *p) noexcept
Definition AMReX_GpuUtility.H:72
Definition AMReX_Amr.cpp:50
std::ostream & operator<<(std::ostream &os, AmrMesh const &amr_mesh)
Stream helper; forwards to the friend declared inside AmrMesh.
Definition AMReX_AmrMesh.cpp:1306
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
A multidimensional array accessor.
Definition AMReX_Array4.H:283
Definition AMReX_GpuUtility.H:183
StreamItInfo() noexcept
Definition AMReX_GpuUtility.H:185
StreamItInfo & DisableDeviceSync() noexcept
Definition AMReX_GpuUtility.H:187
bool device_sync
Definition AMReX_GpuUtility.H:184