Package hedge :: Package backends :: Package cuda :: Module kernelbase
[hide private]
[frames] | no frames]

Source Code for Module hedge.backends.cuda.kernelbase

 1  """Interface with Nvidia CUDA.""" 
 2   
 3  from __future__ import division 
 4   
 5  __copyright__ = "Copyright (C) 2008 Andreas Kloeckner" 
 6   
 7  __license__ = """ 
 8  This program is free software: you can redistribute it and/or modify 
 9  it under the terms of the GNU General Public License as published by 
10  the Free Software Foundation, either version 3 of the License, or 
11  (at your option) any later version. 
12   
13  This program is distributed in the hope that it will be useful, 
14  but WITHOUT ANY WARRANTY; without even the implied warranty of 
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
16  GNU General Public License for more details. 
17   
18  You should have received a copy of the GNU General Public License 
19  along with this program.  If not, see U{http://www.gnu.org/licenses/}. 
20  """ 
21   
22   
23   
24   
25  import numpy 
26  from pytools import memoize_method, Record 
27  import pycuda.gpuarray as gpuarray 
28 29 30 31 32 -class DiffKernelBase(object):
33 - class RstToXyzInfo(Record):
34 pass
35 36 @memoize_method
37 - def fake_localop_rst_to_xyz(self):
38 discr = self.discr 39 given = self.plan.given 40 d = discr.dimensions 41 42 el_count = given.block_count * given.elements_per_block() 43 channels = given.devdata.make_valid_tex_channel_count(d) 44 45 return self.RstToXyzInfo( 46 gpu_data=gpuarray.to_gpu( 47 numpy.ones((channels, d, el_count), 48 dtype=given.float_type, order="F")), 49 channels=channels)
50 51 @memoize_method
52 - def localop_rst_to_xyz(self, diff_op, elgroup):
53 discr = self.discr 54 given = discr.given 55 d = discr.dimensions 56 57 coeffs = diff_op.coefficients(elgroup) 58 59 elgroup_indices = self.discr.elgroup_microblock_indices(elgroup) 60 el_count = given.block_count * given.elements_per_block() 61 62 # indexed local, el_number, global 63 result_matrix = (coeffs[:,:,elgroup_indices] 64 .transpose(1,0,2)).astype(given.float_type) 65 66 assert result_matrix.shape == (d, d, el_count) 67 68 if "cuda_diff" in discr.debug: 69 def get_el_index_in_el_group(el): 70 mygroup, idx = discr.group_map[el.id] 71 assert mygroup is elgroup 72 return idx
73 74 for block in discr.blocks: 75 i = block.number * given.elements_per_block() 76 for mb in block.microblocks: 77 for el in mb: 78 egi = get_el_index_in_el_group(el) 79 assert egi == elgroup_indices[i] 80 assert (result_matrix[:d,:,i].T == coeffs[:,:,egi]).all() 81 i += 1 82 83 return self.RstToXyzInfo( 84 gpu_data=gpuarray.to_gpu( 85 numpy.asarray(result_matrix, order="F")))
86
87 88 89 90 -def fake_elwise_scaling(given):
91 el_count = given.block_count * given.elements_per_block() 92 ij = numpy.ones((el_count,), dtype=given.float_type) 93 return gpuarray.to_gpu(ij)
94