hedge.backends.jit

1 """Just-in-time compiling backend.""" 2 3 from __future__ import division 4 5 __copyright__ = "Copyright (C) 2008 Andreas Kloeckner" 6 7 __license__ = """ 8 This program is free software: you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation, either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program. If not, see U{http://www.gnu.org/licenses/}. 20 """ 21 22 23 24 25 import hedge.discretization 26 import hedge.optemplate 27 from hedge.backends.exec_common import \ 28 CPUExecutorBase, \ 29 CPUExecutionMapperBase 30 import numpy

31 32 33 34 35 # exec mapper ----------------------------------------------------------------- 36 -class ExecutionMapper(CPUExecutionMapperBase):

37 # code execution functions ------------------------------------------------

38 - def exec_vector_expr_assign(self, insn):

39 if self.discr.instrumented: 40 def stats_callback(n, vec_expr): 41 self.discr.vector_math_flop_counter.add(n*insn.flop_count()) 42 return self.discr.vector_math_timer

43 else: 44 stats_callback = None 45 46 if insn.flop_count() == 0: 47 return [(name, self(expr)) 48 for name, expr in zip(insn.names, insn.exprs)], [] 49 else: 50 compiled = insn.compiled(self.executor) 51 return zip(compiled.result_names(), 52 compiled(self, stats_callback)), []

53

54 - def exec_flux_batch_assign(self, insn):

55 from hedge.backends.jit.compiler import BoundaryFluxKind 56 is_bdry = isinstance(insn.kind, BoundaryFluxKind) 57 58 from pymbolic.primitives import is_zero 59 60 class ZeroSpec: pass 61 class BoundaryZeros(ZeroSpec): pass 62 class VolumeZeros(ZeroSpec): pass 63 64 def eval_arg(arg_spec): 65 arg_expr, is_int = arg_spec 66 arg = self.rec(arg_expr) 67 if is_zero(arg): 68 if is_bdry and not is_int: 69 return BoundaryZeros() 70 else: 71 return VolumeZeros() 72 else: 73 return arg

74 75 args = [eval_arg(arg_expr) 76 for arg_expr in insn.flux_var_info.arg_specs] 77 78 from pytools import common_dtype 79 max_dtype = common_dtype( 80 [a.dtype for a in args if not isinstance(a, ZeroSpec)], 81 self.discr.default_scalar_type) 82 83 def cast_arg(arg): 84 if isinstance(arg, BoundaryZeros): 85 return self.discr.boundary_zeros(insn.kind.tag, 86 dtype=max_dtype) 87 elif isinstance(arg, VolumeZeros): 88 return self.discr.volume_zeros( 89 dtype=max_dtype) 90 elif isinstance(arg, numpy.ndarray): 91 return numpy.asarray(arg, dtype=max_dtype) 92 else: 93 return arg 94 95 args = [cast_arg(arg) for arg in args] 96 97 if is_bdry: 98 bdry = self.discr.get_boundary(insn.kind.tag) 99 face_groups = bdry.face_groups 100 else: 101 face_groups = self.discr.face_groups 102 103 result = [] 104 105 for fg in face_groups: 106 # grab module 107 module = insn.get_module(self.discr, max_dtype) 108 func = module.gather_flux 109 110 # set up argument structure 111 arg_struct = module.ArgStruct() 112 for arg_name, arg in zip(insn.flux_var_info.arg_names, args): 113 setattr(arg_struct, arg_name, arg) 114 for arg_num, scalar_arg_expr in enumerate(insn.flux_var_info.scalar_parameters): 115 setattr(arg_struct, 116 "_scalar_arg_%d" % arg_num, 117 self.rec(scalar_arg_expr)) 118 119 fof_shape = (fg.face_count*fg.face_length()*fg.element_count(),) 120 all_fluxes_on_faces = [ 121 numpy.zeros(fof_shape, dtype=max_dtype) 122 for f in insn.fluxes] 123 for i, fof in enumerate(all_fluxes_on_faces): 124 setattr(arg_struct, "flux%d_on_faces" % i, fof) 125 126 assert not arg_struct.__dict__, arg_struct.__dict__.keys() 127 128 # perform gather 129 func(fg, arg_struct) 130 131 # do lift, produce output 132 for name, flux, fluxes_on_faces in zip(insn.names, insn.fluxes, 133 all_fluxes_on_faces): 134 from hedge.optemplate import LiftingFluxOperator 135 136 out = self.discr.volume_zeros(dtype=fluxes_on_faces.dtype) 137 if isinstance(flux.op, LiftingFluxOperator): 138 self.executor.lift_flux(fg, fg.ldis_loc.lifting_matrix(), 139 fg.local_el_inverse_jacobians, fluxes_on_faces, out) 140 else: 141 self.executor.lift_flux(fg, fg.ldis_loc.multi_face_mass_matrix(), 142 None, fluxes_on_faces, out) 143 144 if self.discr.instrumented: 145 from hedge.tools import lift_flops 146 self.discr.lift_flop_counter.add(lift_flops(fg)) 147 148 result.append((name, out)) 149 150 if not face_groups: 151 # No face groups? Still assign context variables. 152 for name, flux in zip(insn.names, insn.fluxes): 153 result.append((name, self.discr.volume_zeros())) 154 155 return result, [] 156

157 - def exec_diff_batch_assign(self, insn):

158 xyz_diff = self.executor.diff(insn.op_class, self.rec(insn.field), 159 xyz_needed=[op.xyz_axis for op in insn.operators]) 160 161 return [(name, diff) 162 for name, op, diff in zip( 163 insn.names, insn.operators, xyz_diff)], []

164

165 - def exec_mass_assign(self, insn):

166 field = self.rec(insn.field) 167 168 if isinstance(field, (float, int)) and field == 0: 169 return [(insn.name, 0)], [] 170 171 out = self.discr.volume_zeros(dtype=field.dtype) 172 self.executor.do_mass(insn.op_class, field, out) 173 174 return [(insn.name, out)], []

175

176 177 178 179 -class Executor(CPUExecutorBase):

180 - def __init__(self, discr, optemplate, post_bind_mapper):

181 self.discr = discr 182 183 self.code = self.compile_optemplate(discr, optemplate, post_bind_mapper) 184 185 if "print_op_code" in discr.debug: 186 from hedge.tools import get_rank 187 if get_rank(discr) == 0: 188 print self.code 189 raw_input() 190 191 def bench_diff(f): 192 test_field = discr.volume_zeros() 193 from hedge.optemplate import DifferentiationOperator 194 from time import time 195 196 xyz_needed = range(discr.dimensions) 197 198 start = time() 199 f(DifferentiationOperator, test_field, xyz_needed) 200 return time() - start

201 202 def bench_lift(f): 203 if len(discr.face_groups) == 0: 204 return 0 205 206 fg = discr.face_groups[0] 207 out = discr.volume_zeros() 208 from time import time 209 210 xyz_needed = range(discr.dimensions) 211 212 fof_shape = (fg.face_count*fg.face_length()*fg.element_count(),) 213 fof = numpy.zeros(fof_shape, dtype=self.discr.default_scalar_type) 214 215 start = time() 216 f(fg, fg.ldis_loc.lifting_matrix(), fg.local_el_inverse_jacobians, fof, out) 217 return time() - start

218 219 def pick_faster_func(benchmark, choices, attempts=3): 220 from pytools import argmin2 221 return argmin2( 222 (f, min(benchmark(f) for i in range(attempts))) 223 for f in choices) 224 225 from hedge.backends.jit.diff import JitDifferentiator 226 self.diff = pick_faster_func(bench_diff, 227 [self.diff_builtin, JitDifferentiator(discr)]) 228 from hedge.backends.jit.lift import JitLifter 229 self.lift_flux = pick_faster_func(bench_lift, 230 [self.lift_flux, JitLifter(discr)]) 231

232 - def compile_optemplate(self, discr, optemplate, post_bind_mapper):

233 from hedge.optemplate import \ 234 OperatorBinder, \ 235 InverseMassContractor, \ 236 BCToFluxRewriter, \ 237 EmptyFluxKiller 238 239 from hedge.optemplate import CommutativeConstantFoldingMapper 240 241 prepared_optemplate = ( 242 InverseMassContractor()( 243 EmptyFluxKiller(discr)( 244 CommutativeConstantFoldingMapper()( 245 post_bind_mapper( 246 BCToFluxRewriter()( 247 OperatorBinder()( 248 optemplate))))))) 249 from hedge.backends.jit.compiler import OperatorCompiler 250 return OperatorCompiler(discr)(prepared_optemplate)

251

252 - def diff_builtin(self, op_class, field, xyz_needed):

253 rst_derivatives = [ 254 self.diff_rst(op_class, i, field) 255 for i in range(self.discr.dimensions)] 256 257 return [self.diff_rst_to_xyz(op_class(i), rst_derivatives) 258 for i in xyz_needed]

259

260 - def __call__(self, **context):

261 return self.code.execute( 262 self.discr.exec_mapper_class(context, self))

263

264 265 266 267 268 269 # discretization -------------------------------------------------------------- 270 -class Discretization(hedge.discretization.Discretization):

271 exec_mapper_class = ExecutionMapper 272 executor_class = Executor 273 274 @classmethod

275 - def all_debug_flags(cls):

276 return hedge.discretization.Discretization.all_debug_flags() | set([ 277 "jit_dont_optimize_large_exprs", 278 "jit_wait_on_compile_error", 279 ])

280 281 @classmethod

282 - def noninteractive_debug_flags(cls):

283 return hedge.discretization.Discretization.noninteractive_debug_flags() | set([ 284 "jit_dont_optimize_large_exprs", 285 ])

286

287 - def __init__(self, *args, **kwargs):

288 toolchain = kwargs.pop("toolchain", None) 289 290 # tolerate (and ignore) the CUDA backend's tune_for argument 291 _ = kwargs.pop("tune_for", None) 292 293 hedge.discretization.Discretization.__init__(self, *args, **kwargs) 294 295 if toolchain is None: 296 from codepy.jit import guess_toolchain 297 toolchain = guess_toolchain() 298 toolchain = toolchain.with_optimization_level(3) 299 300 from codepy.libraries import add_hedge 301 add_hedge(toolchain) 302 303 self.toolchain = toolchain

304

Source Code for Package hedge.backends.jit