Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_GpuLaunch.nolint.H
Go to the documentation of this file.
1
// Do not include this header anywhere other than AMReX_GpuLaunch.H.
2
// The purpose of this file is to avoid clang-tidy.
3
4
#define AMREX_GET_LAUNCH_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
5
#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
6
AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3, \
7
AMREX_WRONG_NUM_ARGS, \
8
AMREX_WRONG_NUM_ARGS, \
9
AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2, \
10
AMREX_WRONG_NUM_ARGS, \
11
AMREX_WRONG_NUM_ARGS, \
12
AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE, \
13
AMREX_WRONG_NUM_ARGS, \
14
AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
15
16
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
17
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3, \
18
AMREX_WRONG_NUM_ARGS, \
19
AMREX_WRONG_NUM_ARGS, \
20
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2, \
21
AMREX_WRONG_NUM_ARGS, \
22
AMREX_WRONG_NUM_ARGS, \
23
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE, \
24
AMREX_WRONG_NUM_ARGS, \
25
AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
26
27
#if (AMREX_SPACEDIM == 1)
28
#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE (a1,a2,a3)
29
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(a1,a2,a3)
30
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(fl,a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(fl,a1,a2,a3)
31
#elif (AMREX_SPACEDIM == 2)
32
#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2 (a1,a2,a3,b1,b2,b3)
33
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(a1,a2,a3,b1,b2,b3)
34
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(fl,a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(fl,a1,a2,a3,b1,b2,b3)
35
#elif (AMREX_SPACEDIM == 3)
36
#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(...) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3 (__VA_ARGS__)
37
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(__VA_ARGS__)
38
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(...) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(__VA_ARGS__)
39
#endif
40
41
#ifdef AMREX_USE_GPU
42
43
#ifndef AMREX_USE_SYCL
44
45
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
46
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
47
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
48
{ \
49
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
50
block \
51
); \
52
} \
53
else { \
54
AMREX_PRAGMA_SIMD \
55
for (amrex_i_inttype i = 0; i < n; ++i) { \
56
block \
57
} \
58
}}
59
60
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
61
{ \
62
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
63
{ \
64
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
65
block \
66
); \
67
} \
68
else { \
69
amrex::LoopConcurrentOnCpu(box, [=] (int i, int j, int k) noexcept \
70
block \
71
); \
72
} \
73
}
74
75
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
76
{ \
77
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
78
{ \
79
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
80
block \
81
); \
82
} \
83
else { \
84
amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
85
block \
86
); \
87
} \
88
}
89
90
#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
91
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
92
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
93
{ \
94
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
95
block \
96
); \
97
} \
98
else { \
99
for (amrex_i_inttype i = 0; i < n; ++i) { \
100
block \
101
} \
102
}}
103
104
#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
105
{ \
106
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
107
{ \
108
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
109
block \
110
); \
111
} \
112
else { \
113
amrex::LoopOnCpu(box, [=] (int i, int j, int k) noexcept \
114
block \
115
); \
116
} \
117
}
118
119
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
120
{ \
121
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
122
{ \
123
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
124
block \
125
); \
126
} \
127
else { \
128
amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
129
block \
130
); \
131
} \
132
}
133
134
#if defined(AMREX_USE_CUDA) && defined(_WIN32)
135
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
136
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
137
{ \
138
amrex::launch(box, [=] AMREX_GPU_DEVICE (std::decay_t<decltype(box)> const& tbox) { block }); \
139
} else { \
140
auto tbox = box; \
141
block; \
142
}
143
#else
144
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
145
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
146
{ \
147
AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
148
} else { \
149
auto tbox = box; \
150
block; \
151
}
152
#endif
153
154
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
155
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
156
{ \
157
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
158
} else { \
159
auto tbx1 = bx1; \
160
block1; \
161
}
162
163
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
164
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
165
{ \
166
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
167
} else { \
168
auto tbx1 = bx1; \
169
auto tbx2 = bx2; \
170
block1; \
171
block2; \
172
}
173
174
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
175
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
176
{ \
177
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
178
} else { \
179
auto tbx1 = bx1; \
180
auto tbx2 = bx2; \
181
auto tbx3 = bx3; \
182
block1; \
183
block2; \
184
block3; \
185
}
186
187
#else
188
// xxxxx SYCL todo: host disabled in host device
189
190
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
191
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
192
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
193
{ \
194
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
195
block \
196
); \
197
} \
198
else { \
199
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
200
}}
201
202
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
203
{ \
204
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
205
{ \
206
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
207
block \
208
); \
209
} \
210
else { \
211
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
212
} \
213
}
214
215
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
216
{ \
217
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
218
{ \
219
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
220
block \
221
); \
222
} \
223
else { \
224
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
225
} \
226
}
227
228
#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
229
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
230
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
231
{ \
232
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
233
block \
234
); \
235
} \
236
else { \
237
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
238
}}
239
240
#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
241
{ \
242
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
243
{ \
244
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
245
block \
246
); \
247
} \
248
else { \
249
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
250
} \
251
}
252
253
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
254
{ \
255
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
256
{ \
257
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
258
block \
259
); \
260
} \
261
else { \
262
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
263
} \
264
}
265
266
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
267
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
268
{ \
269
AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
270
} else { \
271
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
272
}
273
274
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
275
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
276
{ \
277
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
278
} else { \
279
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
280
}
281
282
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
283
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
284
{ \
285
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
286
} else { \
287
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
288
}
289
290
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
291
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
292
{ \
293
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
294
} else { \
295
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
296
}
297
298
#endif
299
300
#else
301
302
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
303
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
304
amrex::ignore_unused(where_to_run); \
305
AMREX_PRAGMA_SIMD \
306
for (amrex_i_inttype i = 0; i < n; ++i) { \
307
block \
308
}}
309
310
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
311
{ \
312
amrex::ignore_unused(where_to_run); \
313
amrex::LoopConcurrentOnCpu(box, [&] (int i, int j, int k) noexcept \
314
block \
315
); \
316
}
317
318
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
319
{ \
320
amrex::ignore_unused(where_to_run); \
321
amrex::LoopConcurrentOnCpu(box, nc, [&] (int i, int j, int k, int n) noexcept \
322
block \
323
); \
324
}
325
326
#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
327
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
328
amrex::ignore_unused(where_to_run); \
329
for (amrex_i_inttype i = 0; i < n; ++i) { \
330
block \
331
}}
332
333
#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
334
{ \
335
amrex::ignore_unused(where_to_run); \
336
amrex::LoopOnCpu(box, [&] (int i, int j, int k) noexcept \
337
block \
338
); \
339
}
340
341
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
342
{ \
343
amrex::ignore_unused(where_to_run); \
344
amrex::LoopOnCpu(box, nc, [&] (int i, int j, int k, int n) noexcept \
345
block \
346
); \
347
}
348
349
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
350
amrex::ignore_unused(where_to_run); \
351
{ \
352
auto tbox = box; \
353
block; \
354
}
355
356
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
357
amrex::ignore_unused(where_to_run); \
358
{ \
359
auto tbx1 = bx1; \
360
block1; \
361
}
362
363
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
364
amrex::ignore_unused(where_to_run); \
365
{ \
366
auto tbx1 = bx1; \
367
auto tbx2 = bx2; \
368
block1; \
369
block2; \
370
}
371
372
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
373
amrex::ignore_unused(where_to_run); \
374
{ \
375
auto tbx1 = bx1; \
376
auto tbx2 = bx2; \
377
auto tbx3 = bx3; \
378
block1; \
379
block2; \
380
block3; \
381
}
382
383
#endif
Src
Base
AMReX_GpuLaunch.nolint.H
Generated by
1.9.8