| Home | Trees | Indices | Help | 
|---|
|  | 
  1  """Just-in-time compiling backend.""" 
  2   
  3  from __future__ import division 
  4   
  5  __copyright__ = "Copyright (C) 2008 Andreas Kloeckner" 
  6   
  7  __license__ = """ 
  8  This program is free software: you can redistribute it and/or modify 
  9  it under the terms of the GNU General Public License as published by 
 10  the Free Software Foundation, either version 3 of the License, or 
 11  (at your option) any later version. 
 12   
 13  This program is distributed in the hope that it will be useful, 
 14  but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  GNU General Public License for more details. 
 17   
 18  You should have received a copy of the GNU General Public License 
 19  along with this program.  If not, see U{http://www.gnu.org/licenses/}. 
 20  """ 
 21   
 22   
 23   
 24   
 25  import hedge.discretization 
 26  import hedge.optemplate 
 27  from hedge.backends.exec_common import \ 
 28          CPUExecutorBase, \ 
 29          CPUExecutionMapperBase 
 30  import numpy 
 31   
 32   
 33   
 34   
 35  # exec mapper ----------------------------------------------------------------- 
 36 -class ExecutionMapper(CPUExecutionMapperBase): 
 37      # code execution functions ------------------------------------------------ 
 39          if self.discr.instrumented: 
 40              def stats_callback(n, vec_expr): 
 41                  self.discr.vector_math_flop_counter.add(n*insn.flop_count()) 
 42                  return self.discr.vector_math_timer 
 43          else: 
 44              stats_callback = None 
 45   
 46          if insn.flop_count() == 0: 
 47              return [(name, self(expr)) 
 48                  for name, expr in zip(insn.names, insn.exprs)], [] 
 49          else: 
 50              compiled = insn.compiled(self.executor) 
 51              return zip(compiled.result_names(), 
 52                      compiled(self, stats_callback)), [] 
 53   
 55          from hedge.backends.jit.compiler import BoundaryFluxKind 
 56          is_bdry = isinstance(insn.kind, BoundaryFluxKind) 
 57   
 58          from pymbolic.primitives import is_zero 
 59   
 60          class ZeroSpec: pass 
 61          class BoundaryZeros(ZeroSpec): pass 
 62          class VolumeZeros(ZeroSpec): pass 
 63   
 64          def eval_arg(arg_spec): 
 65              arg_expr, is_int = arg_spec 
 66              arg = self.rec(arg_expr) 
 67              if is_zero(arg): 
 68                  if is_bdry and not is_int: 
 69                      return BoundaryZeros() 
 70                  else: 
 71                      return VolumeZeros() 
 72              else: 
 73                  return arg 
 74   
 75          args = [eval_arg(arg_expr)  
 76                  for arg_expr in insn.flux_var_info.arg_specs] 
 77   
 78          from pytools import common_dtype 
 79          max_dtype = common_dtype( 
 80                  [a.dtype for a in args if not isinstance(a, ZeroSpec)], 
 81                  self.discr.default_scalar_type) 
 82   
 83          def cast_arg(arg): 
 84              if isinstance(arg, BoundaryZeros): 
 85                  return self.discr.boundary_zeros(insn.kind.tag, 
 86                          dtype=max_dtype) 
 87              elif isinstance(arg, VolumeZeros): 
 88                  return self.discr.volume_zeros( 
 89                          dtype=max_dtype) 
 90              elif isinstance(arg, numpy.ndarray): 
 91                  return numpy.asarray(arg, dtype=max_dtype) 
 92              else: 
 93                  return arg 
 94   
 95          args = [cast_arg(arg) for arg in args] 
 96   
 97          if is_bdry: 
 98              bdry = self.discr.get_boundary(insn.kind.tag) 
 99              face_groups = bdry.face_groups 
100          else: 
101              face_groups = self.discr.face_groups 
102   
103          result = [] 
104   
105          for fg in face_groups: 
106              # grab module 
107              module = insn.get_module(self.discr, max_dtype) 
108              func = module.gather_flux 
109   
110              # set up argument structure 
111              arg_struct = module.ArgStruct() 
112              for arg_name, arg in zip(insn.flux_var_info.arg_names, args): 
113                  setattr(arg_struct, arg_name, arg) 
114              for arg_num, scalar_arg_expr in enumerate(insn.flux_var_info.scalar_parameters): 
115                  setattr(arg_struct,  
116                          "_scalar_arg_%d" % arg_num,  
117                          self.rec(scalar_arg_expr)) 
118   
119              fof_shape = (fg.face_count*fg.face_length()*fg.element_count(),) 
120              all_fluxes_on_faces = [ 
121                      numpy.zeros(fof_shape, dtype=max_dtype) 
122                      for f in insn.fluxes] 
123              for i, fof in enumerate(all_fluxes_on_faces): 
124                  setattr(arg_struct, "flux%d_on_faces" % i, fof) 
125   
126              assert not arg_struct.__dict__, arg_struct.__dict__.keys() 
127   
128              # perform gather 
129              func(fg, arg_struct) 
130   
131              # do lift, produce output 
132              for name, flux, fluxes_on_faces in zip(insn.names, insn.fluxes, 
133                      all_fluxes_on_faces): 
134                  from hedge.optemplate import LiftingFluxOperator 
135   
136                  out = self.discr.volume_zeros(dtype=fluxes_on_faces.dtype) 
137                  if isinstance(flux.op, LiftingFluxOperator): 
138                      self.executor.lift_flux(fg, fg.ldis_loc.lifting_matrix(), 
139                              fg.local_el_inverse_jacobians, fluxes_on_faces, out) 
140                  else: 
141                      self.executor.lift_flux(fg, fg.ldis_loc.multi_face_mass_matrix(), 
142                              None, fluxes_on_faces, out) 
143   
144                  if self.discr.instrumented: 
145                      from hedge.tools import lift_flops 
146                      self.discr.lift_flop_counter.add(lift_flops(fg)) 
147   
148                  result.append((name, out)) 
149   
150          if not face_groups: 
151              # No face groups? Still assign context variables. 
152              for name, flux in zip(insn.names, insn.fluxes): 
153                  result.append((name, self.discr.volume_zeros())) 
154   
155          return result, [] 
156   
158          xyz_diff = self.executor.diff(insn.op_class, self.rec(insn.field), 
159                  xyz_needed=[op.xyz_axis for op in insn.operators]) 
160   
161          return [(name, diff) 
162                  for name, op, diff in zip( 
163                      insn.names, insn.operators, xyz_diff)], [] 
164   
166          field = self.rec(insn.field) 
167   
168          if isinstance(field, (float, int)) and field == 0: 
169              return [(insn.name, 0)], [] 
170   
171          out = self.discr.volume_zeros(dtype=field.dtype) 
172          self.executor.do_mass(insn.op_class, field, out) 
173   
174          return [(insn.name, out)], [] 
175   
181          self.discr = discr 
182   
183          self.code = self.compile_optemplate(discr, optemplate, post_bind_mapper) 
184   
185          if "print_op_code" in discr.debug: 
186              from hedge.tools import get_rank 
187              if get_rank(discr) == 0: 
188                  print self.code 
189                  raw_input() 
190   
191          def bench_diff(f): 
192              test_field = discr.volume_zeros() 
193              from hedge.optemplate import DifferentiationOperator 
194              from time import time 
195   
196              xyz_needed = range(discr.dimensions) 
197   
198              start = time() 
199              f(DifferentiationOperator, test_field, xyz_needed) 
200              return time() - start 
201   
202          def bench_lift(f): 
203              if len(discr.face_groups) == 0: 
204                  return 0 
205   
206              fg = discr.face_groups[0] 
207              out = discr.volume_zeros() 
208              from time import time 
209   
210              xyz_needed = range(discr.dimensions) 
211   
212              fof_shape = (fg.face_count*fg.face_length()*fg.element_count(),) 
213              fof = numpy.zeros(fof_shape, dtype=self.discr.default_scalar_type) 
214   
215              start = time() 
216              f(fg, fg.ldis_loc.lifting_matrix(), fg.local_el_inverse_jacobians, fof, out) 
217              return time() - start 
218   
219          def pick_faster_func(benchmark, choices, attempts=3): 
220              from pytools import argmin2 
221              return argmin2( 
222                      (f, min(benchmark(f) for i in range(attempts))) 
223                      for f in choices) 
224   
225          from hedge.backends.jit.diff import JitDifferentiator 
226          self.diff = pick_faster_func(bench_diff, 
227                  [self.diff_builtin, JitDifferentiator(discr)]) 
228          from hedge.backends.jit.lift import JitLifter 
229          self.lift_flux = pick_faster_func(bench_lift, 
230                  [self.lift_flux, JitLifter(discr)]) 
231   
233          from hedge.optemplate import \ 
234                  OperatorBinder, \ 
235                  InverseMassContractor, \ 
236                  BCToFluxRewriter, \ 
237                  EmptyFluxKiller 
238   
239          from hedge.optemplate import CommutativeConstantFoldingMapper 
240   
241          prepared_optemplate = ( 
242                  InverseMassContractor()( 
243                      EmptyFluxKiller(discr)( 
244                          CommutativeConstantFoldingMapper()( 
245                              post_bind_mapper( 
246                                  BCToFluxRewriter()( 
247                                      OperatorBinder()( 
248                                          optemplate))))))) 
249          from hedge.backends.jit.compiler import OperatorCompiler 
250          return OperatorCompiler(discr)(prepared_optemplate) 
251   
253          rst_derivatives = [ 
254                  self.diff_rst(op_class, i, field) 
255                  for i in range(self.discr.dimensions)] 
256   
257          return [self.diff_rst_to_xyz(op_class(i), rst_derivatives) 
258                  for i in xyz_needed] 
259   
263   
264   
265   
266   
267   
268   
269  # discretization -------------------------------------------------------------- 
270 -class Discretization(hedge.discretization.Discretization): 
271      exec_mapper_class = ExecutionMapper 
272      executor_class = Executor 
273   
274      @classmethod 
276          return hedge.discretization.Discretization.all_debug_flags() | set([ 
277              "jit_dont_optimize_large_exprs", 
278              "jit_wait_on_compile_error", 
279              ]) 
280   
281      @classmethod 
283          return hedge.discretization.Discretization.noninteractive_debug_flags() | set([ 
284              "jit_dont_optimize_large_exprs", 
285              ]) 
286   
288          toolchain = kwargs.pop("toolchain", None) 
289   
290          # tolerate (and ignore) the CUDA backend's tune_for argument 
291          _ = kwargs.pop("tune_for", None) 
292   
293          hedge.discretization.Discretization.__init__(self, *args, **kwargs) 
294   
295          if toolchain is None: 
296              from codepy.jit import guess_toolchain 
297              toolchain = guess_toolchain() 
298              toolchain = toolchain.with_optimization_level(3) 
299   
300          from codepy.libraries import add_hedge 
301          add_hedge(toolchain) 
302   
303          self.toolchain = toolchain 
304   
| Home | Trees | Indices | Help | 
|---|
| Generated by Epydoc 3.0.1 on Sat Aug 29 14:33:52 2009 | http://epydoc.sourceforge.net |