Package hedge :: Package backends :: Package cuda :: Module vector_expr
[hide private]
[frames] | no frames]

Source Code for Module hedge.backends.cuda.vector_expr

  1  """CUDA code generation for vector expressions.""" 
  2   
  3  from __future__ import division 
  4   
  5  __copyright__ = "Copyright (C) 2008 Andreas Kloeckner" 
  6   
  7  __license__ = """ 
  8  This program is free software: you can redistribute it and/or modify 
  9  it under the terms of the GNU General Public License as published by 
 10  the Free Software Foundation, either version 3 of the License, or 
 11  (at your option) any later version. 
 12   
 13  This program is distributed in the hope that it will be useful, 
 14  but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  GNU General Public License for more details. 
 17   
 18  You should have received a copy of the GNU General Public License 
 19  along with this program.  If not, see U{http://www.gnu.org/licenses/}. 
 20  """ 
 21   
 22   
 23   
 24   
 25  import numpy 
 26  import pycuda.driver as drv 
 27  import pycuda.gpuarray as gpuarray 
 28  import pycuda.elementwise 
 29  from hedge.backends.vector_expr import CompiledVectorExpressionBase 
 30   
 31   
 32   
 33   
34 -class CompiledVectorExpression(CompiledVectorExpressionBase):
35 elementwise_mod = pycuda.elementwise 36
37 - def __init__(self, vec_expr_info_list, result_dtype_getter, 38 stream=None, allocator=drv.mem_alloc):
39 CompiledVectorExpressionBase.__init__(self, 40 vec_expr_info_list, result_dtype_getter) 41 42 self.stream = stream 43 self.allocator = allocator
44
45 - def make_kernel_internal(self, args, instructions):
46 from pycuda.elementwise import get_elwise_kernel 47 return get_elwise_kernel(args, instructions, name="vector_expression")
48
49 - def __call__(self, evaluate_subexpr, stats_callback=None):
50 vectors = [evaluate_subexpr(vec_expr) 51 for vec_expr in self.vector_deps] 52 scalars = [evaluate_subexpr(scal_expr) 53 for scal_expr in self.scalar_deps] 54 55 from pytools import single_valued 56 shape = single_valued(vec.shape for vec in vectors) 57 58 kernel_rec = self.get_kernel( 59 tuple(v.dtype for v in vectors), 60 tuple(s.dtype for s in scalars)) 61 62 from hedge.tools import make_obj_array 63 results = [gpuarray.empty( 64 shape, kernel_rec.result_dtype, self.allocator) 65 for expr in self.result_vec_expr_info_list] 66 67 size = results[0].size 68 kernel_rec.kernel.set_block_shape(*results[0]._block) 69 args = ([r.gpudata for r in results] 70 +[v.gpudata for v in vectors] 71 +scalars 72 +[size]) 73 74 if stats_callback is not None: 75 stats_callback(size, self, 76 kernel_rec.kernel.prepared_timed_call(vectors[0]._grid, *args)) 77 else: 78 kernel_rec.kernel.prepared_async_call(vectors[0]._grid, self.stream, *args) 79 80 return results
81 82 83 84 85 if __name__ == "__main__": 86 test_dtype = numpy.float32 87 88 import pycuda.autoinit 89 from pymbolic import parse 90 expr = parse("2*x+3*y+4*z") 91 print expr 92 cexpr = CompiledVectorExpression(expr, 93 lambda expr: (True, test_dtype), 94 test_dtype) 95 96 from pymbolic import var 97 ctx = { 98 var("x"): gpuarray.arange(5, dtype=test_dtype), 99 var("y"): gpuarray.arange(5, dtype=test_dtype), 100 var("z"): gpuarray.arange(5, dtype=test_dtype), 101 } 102 103 print cexpr(lambda expr: ctx[expr]) 104