1 """Interface with Nvidia CUDA."""
2
3 from __future__ import division
4
5 __copyright__ = "Copyright (C) 2008 Andreas Kloeckner"
6
7 __license__ = """
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see U{http://www.gnu.org/licenses/}.
20 """
21
22
23
24
25 import numpy
26 from pytools import memoize_method, Record
27 import pycuda.gpuarray as gpuarray
35
36 @memoize_method
38 discr = self.discr
39 given = self.plan.given
40 d = discr.dimensions
41
42 el_count = given.block_count * given.elements_per_block()
43 channels = given.devdata.make_valid_tex_channel_count(d)
44
45 return self.RstToXyzInfo(
46 gpu_data=gpuarray.to_gpu(
47 numpy.ones((channels, d, el_count),
48 dtype=given.float_type, order="F")),
49 channels=channels)
50
51 @memoize_method
53 discr = self.discr
54 given = discr.given
55 d = discr.dimensions
56
57 coeffs = diff_op.coefficients(elgroup)
58
59 elgroup_indices = self.discr.elgroup_microblock_indices(elgroup)
60 el_count = given.block_count * given.elements_per_block()
61
62
63 result_matrix = (coeffs[:,:,elgroup_indices]
64 .transpose(1,0,2)).astype(given.float_type)
65
66 assert result_matrix.shape == (d, d, el_count)
67
68 if "cuda_diff" in discr.debug:
69 def get_el_index_in_el_group(el):
70 mygroup, idx = discr.group_map[el.id]
71 assert mygroup is elgroup
72 return idx
73
74 for block in discr.blocks:
75 i = block.number * given.elements_per_block()
76 for mb in block.microblocks:
77 for el in mb:
78 egi = get_el_index_in_el_group(el)
79 assert egi == elgroup_indices[i]
80 assert (result_matrix[:d,:,i].T == coeffs[:,:,egi]).all()
81 i += 1
82
83 return self.RstToXyzInfo(
84 gpu_data=gpuarray.to_gpu(
85 numpy.asarray(result_matrix, order="F")))
86
91 el_count = given.block_count * given.elements_per_block()
92 ij = numpy.ones((el_count,), dtype=given.float_type)
93 return gpuarray.to_gpu(ij)
94