-
Notifications
You must be signed in to change notification settings - Fork 427
File metadata and controls
- Code
- Blame
69 lines (56 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
use Time;
config const multitask = false;
config const printOutput = false;
config const printSum = false;
config const printTimes = false;
config const numTasks = 2;
config const taskSize = 100;
config const n = 1000;
config const reps = 10;
const numChunks = n/taskSize;
var curChunk: atomic int;
extern proc printf(s...);
var HostIn: [0..#n] real = 2;
var HostOut: [0..#n] real;
inline proc kernel(ref MyIn, ref MyOut) {
@assertOnGpu
foreach (inData, outData) in zip(MyIn, MyOut) {
var result = 0.0;
for i in 0..#reps do result += inData + sqrt(i);
outData = result;
}
}
var t: stopwatch;
t.start();
on here.gpus[0] {
if multitask {
coforall tid in 0..#numTasks {
var MyIn, MyOut: [0..#taskSize] real;
while true {
const myChunkId = curChunk.fetchAdd(1);
if myChunkId >= numChunks then break;
const myChunk = myChunkId*taskSize..#taskSize;
MyIn = HostIn[myChunk];
kernel(MyIn, MyOut);
HostOut[myChunk] = MyOut;
}
}
}
else {
var MyIn, MyOut: [0..#n] real;
MyIn = HostIn;
kernel(MyIn, MyOut);
HostOut = MyOut;
}
}
t.stop();
if printOutput then writeln(HostOut);
if printSum {
var sum = 0.0;
for o in HostOut do sum += o;
writeln("Sum = ", sum);
}
if printTimes {
writeln("Total (s): ", t.elapsed());
writeln("Throughput (GB/s): ", (n*8)/t.elapsed()/1e9);
}