Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_GpuLaunch.nolint.H
Go to the documentation of this file.
1// Do not include this header anywhere other than AMReX_GpuLaunch.H.
2// The purpose of this file is to avoid clang-tidy.
3
4#define AMREX_GET_LAUNCH_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
5#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
6 AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3, \
7 AMREX_WRONG_NUM_ARGS, \
8 AMREX_WRONG_NUM_ARGS, \
9 AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2, \
10 AMREX_WRONG_NUM_ARGS, \
11 AMREX_WRONG_NUM_ARGS, \
12 AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE, \
13 AMREX_WRONG_NUM_ARGS, \
14 AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
15
16#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
17 AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3, \
18 AMREX_WRONG_NUM_ARGS, \
19 AMREX_WRONG_NUM_ARGS, \
20 AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2, \
21 AMREX_WRONG_NUM_ARGS, \
22 AMREX_WRONG_NUM_ARGS, \
23 AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE, \
24 AMREX_WRONG_NUM_ARGS, \
25 AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
26
27#if (AMREX_SPACEDIM == 1)
28#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE (a1,a2,a3)
29#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(a1,a2,a3)
30#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(fl,a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(fl,a1,a2,a3)
31#elif (AMREX_SPACEDIM == 2)
32#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2 (a1,a2,a3,b1,b2,b3)
33#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(a1,a2,a3,b1,b2,b3)
34#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(fl,a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(fl,a1,a2,a3,b1,b2,b3)
35#elif (AMREX_SPACEDIM == 3)
36#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(...) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3 (__VA_ARGS__)
37#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(__VA_ARGS__)
38#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(...) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(__VA_ARGS__)
39#endif
40
41#ifdef AMREX_USE_GPU
42
43#ifndef AMREX_USE_SYCL
44
45#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
46 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
47 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
48 { \
49 amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
50 block \
51 ); \
52 } \
53 else { \
54 AMREX_PRAGMA_SIMD \
55 for (amrex_i_inttype i = 0; i < n; ++i) { \
56 block \
57 } \
58 }}
59
60#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
61 { \
62 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
63 { \
64 amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
65 block \
66 ); \
67 } \
68 else { \
69 amrex::LoopConcurrentOnCpu(box, [=] (int i, int j, int k) noexcept \
70 block \
71 ); \
72 } \
73 }
74
75#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
76 { \
77 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
78 { \
79 amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
80 block \
81 ); \
82 } \
83 else { \
84 amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
85 block \
86 ); \
87 } \
88 }
89
90#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
91 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
92 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
93 { \
94 amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
95 block \
96 ); \
97 } \
98 else { \
99 for (amrex_i_inttype i = 0; i < n; ++i) { \
100 block \
101 } \
102 }}
103
104#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
105 { \
106 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
107 { \
108 amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
109 block \
110 ); \
111 } \
112 else { \
113 amrex::LoopOnCpu(box, [=] (int i, int j, int k) noexcept \
114 block \
115 ); \
116 } \
117 }
118
119#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
120 { \
121 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
122 { \
123 amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
124 block \
125 ); \
126 } \
127 else { \
128 amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
129 block \
130 ); \
131 } \
132 }
133
134#if defined(AMREX_USE_CUDA) && defined(_WIN32)
135#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
136 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
137 { \
138 amrex::launch(box, [=] AMREX_GPU_DEVICE (std::decay_t<decltype(box)> const& tbox) { block }); \
139 } else { \
140 auto tbox = box; \
141 block; \
142 }
143#else
144#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
145 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
146 { \
147 AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
148 } else { \
149 auto tbox = box; \
150 block; \
151 }
152#endif
153
154#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
155 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
156 { \
157 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
158 } else { \
159 auto tbx1 = bx1; \
160 block1; \
161 }
162
163#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
164 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
165 { \
166 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
167 } else { \
168 auto tbx1 = bx1; \
169 auto tbx2 = bx2; \
170 block1; \
171 block2; \
172 }
173
174#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
175 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
176 { \
177 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
178 } else { \
179 auto tbx1 = bx1; \
180 auto tbx2 = bx2; \
181 auto tbx3 = bx3; \
182 block1; \
183 block2; \
184 block3; \
185 }
186
187#else
188// xxxxx SYCL todo: host disabled in host device
189
190#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
191 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
192 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
193 { \
194 amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
195 block \
196 ); \
197 } \
198 else { \
199 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
200 }}
201
202#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
203 { \
204 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
205 { \
206 amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
207 block \
208 ); \
209 } \
210 else { \
211 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
212 } \
213 }
214
215#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
216 { \
217 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
218 { \
219 amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
220 block \
221 ); \
222 } \
223 else { \
224 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
225 } \
226 }
227
228#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
229 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
230 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
231 { \
232 amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
233 block \
234 ); \
235 } \
236 else { \
237 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
238 }}
239
240#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
241 { \
242 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
243 { \
244 amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
245 block \
246 ); \
247 } \
248 else { \
249 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
250 } \
251 }
252
253#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
254 { \
255 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
256 { \
257 amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
258 block \
259 ); \
260 } \
261 else { \
262 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
263 } \
264 }
265
266#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
267 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
268 { \
269 AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
270 } else { \
271 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
272 }
273
274#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
275 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
276 { \
277 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
278 } else { \
279 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
280 }
281
282#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
283 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
284 { \
285 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
286 } else { \
287 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
288 }
289
290#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
291 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
292 { \
293 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
294 } else { \
295 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
296 }
297
298#endif
299
300#else
301
302#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
303 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
304 amrex::ignore_unused(where_to_run); \
305 AMREX_PRAGMA_SIMD \
306 for (amrex_i_inttype i = 0; i < n; ++i) { \
307 block \
308 }}
309
310#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
311 { \
312 amrex::ignore_unused(where_to_run); \
313 amrex::LoopConcurrentOnCpu(box, [&] (int i, int j, int k) noexcept \
314 block \
315 ); \
316 }
317
318#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
319 { \
320 amrex::ignore_unused(where_to_run); \
321 amrex::LoopConcurrentOnCpu(box, nc, [&] (int i, int j, int k, int n) noexcept \
322 block \
323 ); \
324 }
325
326#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
327 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
328 amrex::ignore_unused(where_to_run); \
329 for (amrex_i_inttype i = 0; i < n; ++i) { \
330 block \
331 }}
332
333#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
334 { \
335 amrex::ignore_unused(where_to_run); \
336 amrex::LoopOnCpu(box, [&] (int i, int j, int k) noexcept \
337 block \
338 ); \
339 }
340
341#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
342 { \
343 amrex::ignore_unused(where_to_run); \
344 amrex::LoopOnCpu(box, nc, [&] (int i, int j, int k, int n) noexcept \
345 block \
346 ); \
347 }
348
349#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
350 amrex::ignore_unused(where_to_run); \
351 { \
352 auto tbox = box; \
353 block; \
354 }
355
356#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
357 amrex::ignore_unused(where_to_run); \
358 { \
359 auto tbx1 = bx1; \
360 block1; \
361 }
362
363#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
364 amrex::ignore_unused(where_to_run); \
365 { \
366 auto tbx1 = bx1; \
367 auto tbx2 = bx2; \
368 block1; \
369 block2; \
370 }
371
372#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
373 amrex::ignore_unused(where_to_run); \
374 { \
375 auto tbx1 = bx1; \
376 auto tbx2 = bx2; \
377 auto tbx3 = bx3; \
378 block1; \
379 block2; \
380 block3; \
381 }
382
383#endif