1 """Just-in-time compiling backend."""
2
3 from __future__ import division
4
5 __copyright__ = "Copyright (C) 2008 Andreas Kloeckner"
6
7 __license__ = """
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see U{http://www.gnu.org/licenses/}.
20 """
21
22
23
24
25 import hedge.discretization
26 import hedge.optemplate
27 from hedge.backends.exec_common import \
28 CPUExecutorBase, \
29 CPUExecutionMapperBase
30 import numpy
37
39 if self.discr.instrumented:
40 def stats_callback(n, vec_expr):
41 self.discr.vector_math_flop_counter.add(n*insn.flop_count())
42 return self.discr.vector_math_timer
43 else:
44 stats_callback = None
45
46 if insn.flop_count() == 0:
47 return [(name, self(expr))
48 for name, expr in zip(insn.names, insn.exprs)], []
49 else:
50 compiled = insn.compiled(self.executor)
51 return zip(compiled.result_names(),
52 compiled(self, stats_callback)), []
53
55 from hedge.backends.jit.compiler import BoundaryFluxKind
56 is_bdry = isinstance(insn.kind, BoundaryFluxKind)
57
58 from pymbolic.primitives import is_zero
59
60 class ZeroSpec: pass
61 class BoundaryZeros(ZeroSpec): pass
62 class VolumeZeros(ZeroSpec): pass
63
64 def eval_arg(arg_spec):
65 arg_expr, is_int = arg_spec
66 arg = self.rec(arg_expr)
67 if is_zero(arg):
68 if is_bdry and not is_int:
69 return BoundaryZeros()
70 else:
71 return VolumeZeros()
72 else:
73 return arg
74
75 args = [eval_arg(arg_expr)
76 for arg_expr in insn.flux_var_info.arg_specs]
77
78 from pytools import common_dtype
79 max_dtype = common_dtype(
80 [a.dtype for a in args if not isinstance(a, ZeroSpec)],
81 self.discr.default_scalar_type)
82
83 def cast_arg(arg):
84 if isinstance(arg, BoundaryZeros):
85 return self.discr.boundary_zeros(insn.kind.tag,
86 dtype=max_dtype)
87 elif isinstance(arg, VolumeZeros):
88 return self.discr.volume_zeros(
89 dtype=max_dtype)
90 elif isinstance(arg, numpy.ndarray):
91 return numpy.asarray(arg, dtype=max_dtype)
92 else:
93 return arg
94
95 args = [cast_arg(arg) for arg in args]
96
97 if is_bdry:
98 bdry = self.discr.get_boundary(insn.kind.tag)
99 face_groups = bdry.face_groups
100 else:
101 face_groups = self.discr.face_groups
102
103 result = []
104
105 for fg in face_groups:
106
107 module = insn.get_module(self.discr, max_dtype)
108 func = module.gather_flux
109
110
111 arg_struct = module.ArgStruct()
112 for arg_name, arg in zip(insn.flux_var_info.arg_names, args):
113 setattr(arg_struct, arg_name, arg)
114 for arg_num, scalar_arg_expr in enumerate(insn.flux_var_info.scalar_parameters):
115 setattr(arg_struct,
116 "_scalar_arg_%d" % arg_num,
117 self.rec(scalar_arg_expr))
118
119 fof_shape = (fg.face_count*fg.face_length()*fg.element_count(),)
120 all_fluxes_on_faces = [
121 numpy.zeros(fof_shape, dtype=max_dtype)
122 for f in insn.fluxes]
123 for i, fof in enumerate(all_fluxes_on_faces):
124 setattr(arg_struct, "flux%d_on_faces" % i, fof)
125
126 assert not arg_struct.__dict__, arg_struct.__dict__.keys()
127
128
129 func(fg, arg_struct)
130
131
132 for name, flux, fluxes_on_faces in zip(insn.names, insn.fluxes,
133 all_fluxes_on_faces):
134 from hedge.optemplate import LiftingFluxOperator
135
136 out = self.discr.volume_zeros(dtype=fluxes_on_faces.dtype)
137 if isinstance(flux.op, LiftingFluxOperator):
138 self.executor.lift_flux(fg, fg.ldis_loc.lifting_matrix(),
139 fg.local_el_inverse_jacobians, fluxes_on_faces, out)
140 else:
141 self.executor.lift_flux(fg, fg.ldis_loc.multi_face_mass_matrix(),
142 None, fluxes_on_faces, out)
143
144 if self.discr.instrumented:
145 from hedge.tools import lift_flops
146 self.discr.lift_flop_counter.add(lift_flops(fg))
147
148 result.append((name, out))
149
150 if not face_groups:
151
152 for name, flux in zip(insn.names, insn.fluxes):
153 result.append((name, self.discr.volume_zeros()))
154
155 return result, []
156
164
175
176
177
178
179 -class Executor(CPUExecutorBase):
180 - def __init__(self, discr, optemplate, post_bind_mapper):
201
202 def bench_lift(f):
203 if len(discr.face_groups) == 0:
204 return 0
205
206 fg = discr.face_groups[0]
207 out = discr.volume_zeros()
208 from time import time
209
210 xyz_needed = range(discr.dimensions)
211
212 fof_shape = (fg.face_count*fg.face_length()*fg.element_count(),)
213 fof = numpy.zeros(fof_shape, dtype=self.discr.default_scalar_type)
214
215 start = time()
216 f(fg, fg.ldis_loc.lifting_matrix(), fg.local_el_inverse_jacobians, fof, out)
217 return time() - start
218
219 def pick_faster_func(benchmark, choices, attempts=3):
220 from pytools import argmin2
221 return argmin2(
222 (f, min(benchmark(f) for i in range(attempts)))
223 for f in choices)
224
225 from hedge.backends.jit.diff import JitDifferentiator
226 self.diff = pick_faster_func(bench_diff,
227 [self.diff_builtin, JitDifferentiator(discr)])
228 from hedge.backends.jit.lift import JitLifter
229 self.lift_flux = pick_faster_func(bench_lift,
230 [self.lift_flux, JitLifter(discr)])
231
251
259
261 return self.code.execute(
262 self.discr.exec_mapper_class(context, self))
263
264
265
266
267
268
269
270 -class Discretization(hedge.discretization.Discretization):
271 exec_mapper_class = ExecutionMapper
272 executor_class = Executor
273
274 @classmethod
280
281 @classmethod
286
304