1  """Compiler to turn operator expression tree into (imperative) bytecode.""" 
  2   
  3  from __future__ import division 
  4   
  5  __copyright__ = "Copyright (C) 2008 Andreas Kloeckner" 
  6   
  7  __license__ = """ 
  8  Permission is hereby granted, free of charge, to any person 
  9  obtaining a copy of this software and associated documentation 
 10  files (the "Software"), to deal in the Software without 
 11  restriction, including without limitation the rights to use, 
 12  copy, modify, merge, publish, distribute, sublicense, and/or sell 
 13  copies of the Software, and to permit persons to whom the 
 14  Software is furnished to do so, subject to the following 
 15  conditions: 
 16   
 17  The above copyright notice and this permission notice shall be 
 18  included in all copies or substantial portions of the Software. 
 19   
 20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 21  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 22  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 23  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 24  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
 25  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 26  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
 27  OTHER DEALINGS IN THE SOFTWARE. 
 28  """ 
 29   
 30   
 31   
 32   
 33  from pytools import Record, memoize_method 
 34  from hedge.optemplate import IdentityMapper 
 41      __slots__ = ["dep_mapper_factory"] 
 42      priority = 0 
 43   
 45          raise NotImplementedError("no get_assignees in %s" % self.__class__) 
  46   
 48          raise NotImplementedError("no get_dependencies in %s" % self.__class__) 
  49   
 51          raise NotImplementedError 
  52   
 54          raise NotImplementedError 
   55   
 57       
 58       
 59       
 60       
 61       
 62   
 63      comment = "" 
 64   
 65 -    def __init__(self, names, exprs, **kwargs): 
  70   
 71      @memoize_method 
 75   
 77          return set(self.names) 
  78   
 80          try: 
 81              if each_vector: 
 82                  raise AttributeError 
 83              else: 
 84                  return self._dependencies 
 85          except: 
 86               
 87              dep_mapper = self.dep_mapper_factory(each_vector)  
 88   
 89              from operator import or_ 
 90              deps = reduce( 
 91                      or_, (dep_mapper(expr)  
 92                      for expr in self.exprs)) 
 93   
 94              from pymbolic.primitives import Variable 
 95              deps -= set(Variable(name) for name in self.names) 
 96   
 97              if not each_vector: 
 98                  self._dependencies = deps 
 99   
100              return deps 
 101   
124   
 127   
129      __slots__ = ["names", "fluxes", "kind"] 
130   
132          return set(self.names) 
 133   
135          lines = [] 
136          lines.append("{ /* %s */" % self.kind) 
137          for n, f in zip(self.names, self.fluxes): 
138              lines.append("  %s <- %s" % (n, f)) 
139          lines.append("}") 
140          return "\n".join(lines) 
 141   
 144   
146       
147   
149          return set(self.names) 
 150   
151      @memoize_method 
154   
156          lines = [] 
157   
158          if len(self.names) > 1: 
159              lines.append("{") 
160              for n, d in zip(self.names, self.operators): 
161                  lines.append("  %s <- %s * %s" % (n, d, self.field)) 
162              lines.append("}") 
163          else: 
164              for n, d in zip(self.names, self.operators): 
165                  lines.append("%s <- %s * %s" % (n, d, self.field)) 
166   
167          return "\n".join(lines) 
 168   
 171   
173      __slots__ = ["name", "op_class", "field"] 
174   
176          return set([self.name]) 
 177   
180   
182          return "%s <- %s * %s" % ( 
183                  self.name, 
184                  str(self.op_class()), 
185                  self.field) 
 186   
 189   
192      __slots__ = ["names", "indices_and_ranks", "rank_to_index_and_name", "field"] 
193      priority = 1 
194   
195 -    def __init__(self, names, indices_and_ranks, field, dep_mapper_factory): 
 196          rank_to_index_and_name = {} 
197          for name, (index, rank) in zip( 
198                  names, indices_and_ranks): 
199              rank_to_index_and_name.setdefault(rank, []).append( 
200                  (index, name)) 
201   
202          Instruction.__init__(self, 
203                  names=names, 
204                  indices_and_ranks=indices_and_ranks, 
205                  rank_to_index_and_name=rank_to_index_and_name, 
206                  field=field, 
207                  dep_mapper_factory=dep_mapper_factory) 
 208   
210          return set(self.names) 
 211   
214   
225   
227          return executor.exec_flux_exchange_batch_assign 
  228   
234      origins = {} 
235      node_names = {} 
236   
237      result = [ 
238              "initial [label=\"initial\"]" 
239              "result [label=\"result\"]" 
240              ] 
241   
242      for num, insn in enumerate(code.instructions): 
243          node_name = "node%d" % num 
244          node_names[insn] = node_name 
245          node_label = str(insn).replace("\n", "\\l")+"\\l" 
246   
247          if max_node_label_length is not None: 
248              node_label = node_label[:max_node_label_length] 
249   
250          result.append("%s [ label=\"p%d: %s\" shape=box ];" % ( 
251              node_name, insn.priority, node_label)) 
252   
253          for assignee in insn.get_assignees(): 
254              origins[assignee] = node_name 
255   
256      def get_orig_node(expr): 
257          from pymbolic.primitives import Variable 
258          if isinstance(expr, Variable): 
259              return origins.get(expr.name, "initial") 
260          else: 
261              return "initial" 
 262   
263      def gen_expr_arrow(expr, target_node): 
264          result.append("%s -> %s [label=\"%s\"];" 
265                  % (get_orig_node(expr), target_node, expr)) 
266   
267      for insn in code.instructions: 
268          for dep in insn.get_dependencies(): 
269              gen_expr_arrow(dep, node_names[insn]) 
270   
271      from hedge.tools import is_obj_array 
272   
273      if is_obj_array(code.result): 
274          for subexp in code.result: 
275              gen_expr_arrow(subexp, "result") 
276      else: 
277          gen_expr_arrow(code.result, "result") 
278   
279      return "digraph dataflow {\n%s\n}\n" % "\n".join(result) 
280   
281   
282   
283   
284   
285   
286 -class Code(object): 
 287 -    def __init__(self, instructions, result): 
 288          self.instructions = instructions 
289          self.result = result 
 290   
305   
306   
309   
310      @memoize_method 
312          from pytools import all, argmax2 
313          available_insns = [ 
314                  (insn, insn.priority) for insn in self.instructions 
315                  if insn not in done_insns 
316                  and all(dep.name in available_names 
317                      for dep in insn.get_dependencies())] 
318   
319          if not available_insns: 
320              raise self.NoInstructionAvailable 
321   
322          from pytools import flatten 
323          discardable_vars = set(available_names) - set(flatten( 
324              [dep.name for dep in insn.get_dependencies()] 
325              for insn in self.instructions 
326              if insn not in done_insns )) 
327   
328          from hedge.tools import with_object_array_or_scalar 
329          with_object_array_or_scalar( 
330                  lambda var: discardable_vars.discard(var.name), 
331                  self.result) 
332   
333          return argmax2(available_insns), discardable_vars 
 334   
336          lines = [] 
337          for insn in self.instructions: 
338              lines.extend(str(insn).split("\n")) 
339          lines.append(str(self.result)) 
340   
341          return "\n".join(lines) 
 342   
344          context = exec_mapper.context 
345   
346          futures = [] 
347          done_insns = set() 
348   
349          quit_flag = False 
350          force_future = False 
351          while not quit_flag: 
352               
353              i = 0 
354              while i < len(futures): 
355                  future = futures[i] 
356                  if force_future or future.is_ready(): 
357                      assignments, new_futures = future() 
358                      for target, value in assignments: 
359                          context[target] = value 
360                      futures.extend(new_futures) 
361                      futures.pop(i) 
362                      force_future = False 
363                  else: 
364                      i += 1 
365   
366                  del future 
367   
368               
369              try: 
370                  insn, discardable_vars = self.get_next_step( 
371                          frozenset(context.keys()), 
372                          frozenset(done_insns)) 
373              except self.NoInstructionAvailable: 
374                  if futures: 
375                       
376                      force_future = True 
377                  else: 
378                       
379                      quit_flag = True 
380              else: 
381                  for name in discardable_vars: 
382                      del context[name] 
383   
384                  done_insns.add(insn) 
385                  assignments, new_futures = \ 
386                          insn.get_executor_method(exec_mapper)(insn) 
387                  for target, value in assignments: 
388                      context[target] = value 
389   
390                  futures.extend(new_futures) 
391   
392          if len(done_insns) < len(self.instructions): 
393              print "Unreachable instructions:" 
394              for insn in set(self.instructions) - done_insns: 
395                  print "    ", insn 
396   
397              raise RuntimeError("not all instructions are reachable" 
398                      "--did you forget to pass a value for a placeholder?") 
399   
400          from hedge.tools import with_object_array_or_scalar 
401          return with_object_array_or_scalar(exec_mapper, self.result) 
 408      from hedge.optemplate import BoundOperatorCollector \ 
409              as bound_op_collector_class 
410   
412          __slots__ = ["flux_expr", "dependencies", "kind"] 
 413   
415          __slots__ = ["flux_exprs", "kind"] 
 416   
417 -    def __init__(self, prefix="_expr", max_vectors_in_batch_expr=None): 
 418          IdentityMapper.__init__(self) 
419          self.prefix = prefix 
420   
421          self.max_vectors_in_batch_expr = max_vectors_in_batch_expr 
422   
423          self.code = [] 
424          self.assigned_var_count = 0 
425          self.expr_to_var = {} 
 426   
427      @memoize_method 
436   
438          """Recursively enumerate all flux expressions in the expression tree 
439          `expr`. The returned list consists of `ExecutionPlanner.FluxRecord` 
440          instances with fields `flux_expr` and `dependencies`. 
441          """ 
442   
443           
444          raise NotImplementedError 
 445   
449   
453   
514   
516          new_name = self.prefix+str(self.assigned_var_count) 
517          self.assigned_var_count += 1 
518          return new_name 
 519   
529   
549   
575   
591   
621   
646   
656   
657       
662   
665   
666       
668          from pymbolic.primitives import Variable 
669   
670           
671          def get_complete_origins_set(insn, skip_levels=0): 
672              if skip_levels < 0: 
673                  skip_levels = 0 
674   
675              result = set() 
676              for dep in insn.get_dependencies(): 
677                  if isinstance(dep, Variable): 
678                      dep_origin = origins_map.get(dep.name, None) 
679                      if dep_origin is not None: 
680                          if skip_levels <= 0: 
681                              result.add(dep_origin) 
682                          result |= get_complete_origins_set(dep_origin, skip_levels-1) 
683   
684              return result 
 685   
686          var_assignees_cache = {} 
687          def get_var_assignees(insn): 
688              try: 
689                  return var_assignees_cache[insn] 
690              except KeyError: 
691                  result = set(Variable(assignee)  
692                          for assignee in insn.get_assignees()) 
693                  var_assignees_cache[insn] = result 
694                  return result 
 695   
696          def aggregate_two_assignments(ass_1, ass_2): 
697              names = ass_1.names+ass_2.names 
698   
699              from pymbolic.primitives import Variable 
700              deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \ 
701                      - set(Variable(name) for name in names) 
702   
703              return Assign( 
704                      names=names, exprs=ass_1.exprs+ass_2.exprs, 
705                      _dependencies=deps, 
706                      dep_mapper_factory=self.dep_mapper_factory, 
707                      priority=max(ass_1.priority, ass_2.priority)) 
708   
709           
710          origins_map = dict( 
711                      (assignee, insn) 
712                      for insn in instructions 
713                      for assignee in insn.get_assignees()) 
714   
715          from pytools import partition 
716          unprocessed_assigns, other_insns = partition( 
717                  lambda insn: isinstance(insn, Assign), 
718                  instructions) 
719   
720           
721          processed_assigns, unprocessed_assigns = partition( 
722                  lambda ass: ass.flop_count() == 0, 
723                  unprocessed_assigns) 
724   
725           
726          while unprocessed_assigns: 
727              my_assign = unprocessed_assigns.pop() 
728   
729              my_deps = my_assign.get_dependencies() 
730              my_assignees = get_var_assignees(my_assign) 
731   
732              agg_candidates = [] 
733              for i, other_assign in enumerate(unprocessed_assigns): 
734                  other_deps = other_assign.get_dependencies() 
735                  other_assignees = get_var_assignees(other_assign) 
736   
737                  if ((my_deps & other_deps 
738                          or my_deps & other_assignees 
739                          or other_deps & my_assignees) 
740                          and my_assign.priority == other_assign.priority): 
741                      agg_candidates.append((i, other_assign)) 
742   
743              did_work = False 
744   
745              if agg_candidates: 
746                  my_indirect_origins = get_complete_origins_set( 
747                          my_assign, skip_levels=1) 
748   
749                  for other_assign_index, other_assign in agg_candidates: 
750                      if self.max_vectors_in_batch_expr is not None: 
751                          new_assignee_count = len( 
752                                  set(my_assign.get_assignees()) 
753                                  | set(other_assign.get_assignees())) 
754                          new_dep_count = len( 
755                                  my_assign.get_dependencies(each_vector=True) 
756                                  | other_assign.get_dependencies(each_vector=True)) 
757   
758                          if (new_assignee_count + new_dep_count \ 
759                                  > self.max_vectors_in_batch_expr): 
760                              continue 
761   
762                      other_indirect_origins = get_complete_origins_set( 
763                              other_assign, skip_levels=1) 
764   
765                      if (my_assign not in other_indirect_origins and 
766                              other_assign not in my_indirect_origins): 
767                          did_work = True 
768   
769                           
770                          new_assignment = aggregate_two_assignments(my_assign, other_assign) 
771                          del unprocessed_assigns[other_assign_index] 
772                          unprocessed_assigns.append(new_assignment) 
773                          for assignee in new_assignment.get_assignees(): 
774                              origins_map[assignee] = new_assignment 
775   
776                          break 
777   
778              if not did_work: 
779                  processed_assigns.append(my_assign) 
780   
781          externally_used_names = set( 
782                  expr 
783                  for insn in processed_assigns+other_insns 
784                  for expr in insn.get_dependencies()) 
785   
786          from hedge.tools import is_obj_array 
787          if is_obj_array(result): 
788              externally_used_names |= set(expr for expr in result) 
789          else: 
790              externally_used_names |= set([result]) 
791   
792          def schedule_and_finalize_assignment(ass): 
793              dep_mapper = self.dep_mapper_factory() 
794   
795              names_exprs = zip(ass.names, ass.exprs) 
796   
797              my_assignees = set(name for name, expr in names_exprs) 
798              names_exprs_deps = [ 
799                      (name, expr,  
800                          set(dep.name for dep in dep_mapper(expr) if 
801                              isinstance(dep, Variable)) & my_assignees) 
802                      for name, expr in names_exprs] 
803   
804              ordered_names_exprs = [] 
805              available_names = set() 
806   
807              while names_exprs_deps: 
808                  schedulable = [] 
809   
810                  i = 0 
811                  while i < len(names_exprs_deps): 
812                      name, expr, deps = names_exprs_deps[i] 
813   
814                      unsatisfied_deps = deps - available_names 
815   
816                      if not unsatisfied_deps: 
817                          schedulable.append((str(expr), name, expr)) 
818                          del names_exprs_deps[i] 
819                      else: 
820                          i += 1 
821   
822                   
823                  schedulable.sort() 
824   
825                  if schedulable: 
826                      for key, name, expr in schedulable: 
827                          ordered_names_exprs.append((name, expr)) 
828                          available_names.add(name) 
829                  else: 
830                      raise RuntimeError("aggregation resulted in an impossible assignment") 
831   
832              return self.finalize_multi_assign( 
833                      names=[name for name, expr in ordered_names_exprs], 
834                      exprs=[expr for name, expr in ordered_names_exprs], 
835                      do_not_return=[Variable(name) not in externally_used_names 
836                          for name, expr in ordered_names_exprs], 
837                      priority=ass.priority) 
838   
839          return [schedule_and_finalize_assignment(ass) 
840              for ass in processed_assigns] + other_insns 
841