1 """CUDA code generation for vector expressions."""
2
3 from __future__ import division
4
5 __copyright__ = "Copyright (C) 2008 Andreas Kloeckner"
6
7 __license__ = """
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see U{http://www.gnu.org/licenses/}.
20 """
21
22
23
24
25 import numpy
26 import pycuda.driver as drv
27 import pycuda.gpuarray as gpuarray
28 import pycuda.elementwise
29 from hedge.backends.vector_expr import CompiledVectorExpressionBase
30
31
32
33
35 elementwise_mod = pycuda.elementwise
36
37 - def __init__(self, vec_expr_info_list, result_dtype_getter,
38 stream=None, allocator=drv.mem_alloc):
44
46 from pycuda.elementwise import get_elwise_kernel
47 return get_elwise_kernel(args, instructions, name="vector_expression")
48
49 - def __call__(self, evaluate_subexpr, stats_callback=None):
50 vectors = [evaluate_subexpr(vec_expr)
51 for vec_expr in self.vector_deps]
52 scalars = [evaluate_subexpr(scal_expr)
53 for scal_expr in self.scalar_deps]
54
55 from pytools import single_valued
56 shape = single_valued(vec.shape for vec in vectors)
57
58 kernel_rec = self.get_kernel(
59 tuple(v.dtype for v in vectors),
60 tuple(s.dtype for s in scalars))
61
62 from hedge.tools import make_obj_array
63 results = [gpuarray.empty(
64 shape, kernel_rec.result_dtype, self.allocator)
65 for expr in self.result_vec_expr_info_list]
66
67 size = results[0].size
68 kernel_rec.kernel.set_block_shape(*results[0]._block)
69 args = ([r.gpudata for r in results]
70 +[v.gpudata for v in vectors]
71 +scalars
72 +[size])
73
74 if stats_callback is not None:
75 stats_callback(size, self,
76 kernel_rec.kernel.prepared_timed_call(vectors[0]._grid, *args))
77 else:
78 kernel_rec.kernel.prepared_async_call(vectors[0]._grid, self.stream, *args)
79
80 return results
81
82
83
84
85 if __name__ == "__main__":
86 test_dtype = numpy.float32
87
88 import pycuda.autoinit
89 from pymbolic import parse
90 expr = parse("2*x+3*y+4*z")
91 print expr
92 cexpr = CompiledVectorExpression(expr,
93 lambda expr: (True, test_dtype),
94 test_dtype)
95
96 from pymbolic import var
97 ctx = {
98 var("x"): gpuarray.arange(5, dtype=test_dtype),
99 var("y"): gpuarray.arange(5, dtype=test_dtype),
100 var("z"): gpuarray.arange(5, dtype=test_dtype),
101 }
102
103 print cexpr(lambda expr: ctx[expr])
104