-
Notifications
You must be signed in to change notification settings - Fork 427
File metadata and controls
- Code
- Blame
41 lines (32 loc) · 1001 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/Data created on Locales[0], chunks copied to locales and then to GPUs
use GpuDiagnostics;
config const n = 128;
config const alpha = 5;
var A, B, C: [0..<n] int;
B = 1;
C = 2;
startGpuDiagnostics();
coforall (l,lid) in zip(Locales, LocaleSpace) with (ref A) do on l {
const perLocSize = n/numLocales;
const locStart = lid*perLocSize;
const locChunk = locStart..#perLocSize;
var Al: [locChunk] int;
var Bl = B[locChunk], Cl = C[locChunk];
const numGPUs = here.gpus.size;
coforall (g,gid) in zip(here.gpus, here.gpus.domain) with (ref A) do on g {
const perGPUSize = perLocSize/numGPUs;
const gpuStart = locStart+gid*perGPUSize;
const gpuChunk = gpuStart..#perGPUSize;
var Ag: [gpuChunk] int;
var Bg = Bl[gpuChunk], Cg = Cl[gpuChunk];
Ag = Bg + alpha * Cg;
Al[gpuChunk] = Ag;
}
A[locChunk] = Al;
}
stopGpuDiagnostics();
/ validation
param nLaunch=1;
for l in Locales {
assert(getGpuDiagnostics()[l.id].kernel_launch == nLaunch*l.gpus.size);
}