Source code for pytato.target.loopy.codegen

from __future__ import annotations

__copyright__ = """Copyright (C) 2020 Matt Wala"""

__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""

import sys
import dataclasses
import islpy as isl
import loopy as lp
import pytools
import re
import pytato.scalar_expr as scalar_expr
import pymbolic.primitives as prim
from pymbolic import var

from typing import (
        Union, Optional, Mapping, Dict, Tuple, FrozenSet, Set, Callable,
        Any, List)


from pytato.array import (Array, DictOfNamedArrays, ShapeType, IndexLambda,
        SizeParam, InputArgumentBase, Placeholder, NamedArray)

from pytato.target import BoundProgram
from pytato.target.loopy import LoopyPyOpenCLTarget, LoopyTarget
from pytato.transform import Mapper, WalkMapper
from pytato.scalar_expr import ScalarExpression
from pytato.codegen import preprocess, normalize_outputs, SymbolicIndex
from pytato.loopy import LoopyCall

# set in doc/conf.py
if getattr(sys, "PYTATO_BUILDING_SPHINX_DOCS", False):
    # Avoid import unless building docs to avoid creating a hard
    # dependency on pyopencl, when Loopy can run fine without.
    import pyopencl

__doc__ = """
.. autoclass:: LoopyExpressionContext
.. autoclass:: ImplementedResult
.. autoclass:: StoredResult
.. autoclass:: InlinedResult
.. autoclass:: SubstitutionRuleResult
.. autoclass:: CodeGenState
.. autoclass:: CodeGenMapper
.. autoclass:: InlinedExpressionGenMapper

.. autofunction:: domain_for_shape
.. autofunction:: get_loopy_temporary
.. autofunction:: add_store
.. autofunction:: rename_reductions
.. autofunction:: normalize_outputs
.. autofunction:: get_initial_codegen_state
"""


def loopy_substitute(expression: Any, variable_assigments: Mapping[str, Any]) -> Any:
    from loopy.symbolic import SubstitutionMapper
    from pymbolic.mapper.substitutor import make_subst_func
    return SubstitutionMapper(make_subst_func(variable_assigments))(expression)


# {{{ generated array expressions

# SymbolicIndex and ShapeType are semantically distinct but identical at the
# type level.
ReductionBounds = Dict[str, Tuple[ScalarExpression, ScalarExpression]]


[docs]@dataclasses.dataclass(init=True, repr=False, eq=False) class LoopyExpressionContext(object): """Mutable state used while generating :mod:`loopy` expressions. Wraps :class:`CodeGenState` with more expression-specific information. This data is passed through :class:`InlinedExpressionGenMapper` via arguments, and is also used by :meth:`ImplementedResult.to_loopy_expression` to retrieve contextual data. .. attribute:: state The :class:`CodeGenState`. .. attribute:: local_namespace A (read-only) local name mapping used for name lookup when generating code. .. attribute:: num_indices The number of indices of the form ``_0``, ``_1``, allowed in the expression. .. attribute:: depends_on The set of statement IDs that need to be included in :attr:`loopy.InstructionBase.depends_on`. .. attribute:: reduction_bounds A mapping from inames to reduction bounds in the expression. .. automethod:: update_depends_on .. automethod:: lookup """ state: CodeGenState num_indices: int _depends_on: FrozenSet[str] = \ dataclasses.field(default_factory=frozenset) local_namespace: Mapping[str, Array] = \ dataclasses.field(default_factory=dict) reduction_bounds: ReductionBounds = \ dataclasses.field(default_factory=dict)
[docs] def lookup(self, name: str) -> Array: return self.local_namespace[name]
@property def depends_on(self) -> FrozenSet[str]: return self._depends_on
[docs] def update_depends_on(self, other: FrozenSet[str]) -> None: self._depends_on = self._depends_on | other
[docs]class ImplementedResult(object): """Generated code for a node in the computation graph (i.e., an array expression). .. automethod:: to_loopy_expression """
[docs] def to_loopy_expression(self, indices: SymbolicIndex, expr_context: LoopyExpressionContext) -> ScalarExpression: """Return a :mod:`loopy` expression for this result. :param indices: symbolic expressions for the indices of the array :param expr_context: the associated expression context. The fields are treated as follows: - *depends_on* is populated with any dependencies needed for the generated expression. - *reduction_bounds* is populated with reduction bounds for the reduction inames in the returned expression. If *reduction_bounds* is nonempty, then the returned inames are ensured to be disjoint from those present. """ raise NotImplementedError
[docs]class StoredResult(ImplementedResult): """An array expression generated as a :mod:`loopy` array. See also: :class:`pytato.array.ImplStored`. """ def __init__(self, name: str, num_indices: int, depends_on: FrozenSet[str]): self.name = name self.num_indices = num_indices self.depends_on = depends_on def to_loopy_expression(self, indices: SymbolicIndex, expr_context: LoopyExpressionContext) -> ScalarExpression: assert len(indices) == self.num_indices expr_context.update_depends_on(self.depends_on) if indices == (): return prim.Variable(self.name) else: return prim.Variable(self.name)[indices]
[docs]class InlinedResult(ImplementedResult): """An array expression generated as a :mod:`loopy` expression containing inlined sub-expressions. See also: :class:`pytato.array.ImplInlined`. """ def __init__(self, expr: ScalarExpression, num_indices: int, reduction_bounds: ReductionBounds, depends_on: FrozenSet[str]): self.expr = expr self.num_indices = num_indices self.reduction_bounds = dict(reduction_bounds) self.depends_on = depends_on @staticmethod def from_loopy_expression( loopy_expr: ScalarExpression, loopy_expr_context: LoopyExpressionContext) -> InlinedResult: return InlinedResult(loopy_expr, loopy_expr_context.num_indices, loopy_expr_context.reduction_bounds, loopy_expr_context.depends_on) def to_loopy_expression(self, indices: SymbolicIndex, expr_context: LoopyExpressionContext) -> ScalarExpression: assert len(indices) == self.num_indices substitutions = {f"_{d}": i for d, i in enumerate(indices)} reduction_start = len(expr_context.reduction_bounds) # Rename reductions in expression not to conflict with those in expr_context. for i, (old_name, bounds) in enumerate(self.reduction_bounds.items()): new_name = f"_r{i + reduction_start}" assert new_name not in expr_context.reduction_bounds substitutions[old_name] = var(new_name) expr_context.reduction_bounds[new_name] = bounds expr_context.update_depends_on(self.depends_on) return loopy_substitute(self.expr, substitutions)
[docs]class SubstitutionRuleResult(ImplementedResult): # TODO: implement pass
# }}} # {{{ codegen
[docs]@dataclasses.dataclass(init=True, repr=False, eq=False) class CodeGenState: """A container for data kept by :class:`CodeGenMapper`. .. attribute:: _program The partial :class:`loopy.LoopKernel` or :class:`loopy.TranslationUnit` being built. .. attribute:: results A mapping from :class:`pytato.Array` instances to instances of :class:`ImplementedResult`. .. attribute:: var_name_gen .. attribute:: insn_id_gen .. automethod:: update_kernel """ _program: Union["lp.TranslationUnit", lp.LoopKernel] results: Dict[Array, ImplementedResult] var_name_gen: pytools.UniqueNameGenerator = dataclasses.field(init=False) insn_id_gen: pytools.UniqueNameGenerator = dataclasses.field(init=False) def __post_init__(self) -> None: if isinstance(self._program, lp.LoopKernel): self.var_name_gen = self._program.get_var_name_generator() self.insn_id_gen = self._program.get_instruction_id_generator() else: self.var_name_gen = self._program["_pt_kernel"].get_var_name_generator() self.insn_id_gen = ( self._program["_pt_kernel"].get_instruction_id_generator()) @property def program(self) -> Union["lp.Program", lp.LoopKernel]: return self._program @property def kernel(self) -> lp.LoopKernel: """ Returns the entry kernel of the loopy program being built. """ if isinstance(self._program, lp.LoopKernel): return self._program else: return self._program["_pt_kernel"]
[docs] def update_kernel(self, kernel: lp.LoopKernel) -> None: if isinstance(self._program, lp.LoopKernel): self._program = kernel else: self._program = self._program.with_kernel(kernel)
def update_program(self, program: lp.Program) -> None: self._program = program
[docs]class CodeGenMapper(Mapper): """A mapper for generating code for nodes in the computation graph. """ exprgen_mapper: InlinedExpressionGenMapper def __init__(self) -> None: self.exprgen_mapper = InlinedExpressionGenMapper(self) def map_size_param(self, expr: SizeParam, state: CodeGenState) -> ImplementedResult: if expr in state.results: return state.results[expr] arg = lp.ValueArg(expr.name, dtype=expr.dtype) kernel = state.kernel.copy(args=state.kernel.args + [arg]) state.update_kernel(kernel) assert expr.name is not None result = StoredResult(expr.name, expr.ndim, frozenset()) state.results[expr] = result return result def map_placeholder(self, expr: Placeholder, state: CodeGenState) -> ImplementedResult: if expr in state.results: return state.results[expr] shape = shape_to_scalar_expression(expr.shape, self, state) arg = lp.GlobalArg(expr.name, shape=shape, dtype=expr.dtype, order="C", is_input=True, is_output=False) kernel = state.kernel.copy(args=state.kernel.args + [arg]) state.update_kernel(kernel) assert expr.name is not None result = StoredResult(expr.name, expr.ndim, frozenset()) state.results[expr] = result return result def map_index_lambda(self, expr: IndexLambda, state: CodeGenState) -> ImplementedResult: if expr in state.results: return state.results[expr] # TODO: Respect tags. loopy_expr_context = LoopyExpressionContext(state, local_namespace=expr.bindings, num_indices=expr.ndim) loopy_expr = self.exprgen_mapper(expr.expr, loopy_expr_context) result = InlinedResult.from_loopy_expression(loopy_expr, loopy_expr_context) state.results[expr] = result shape_to_scalar_expression(expr.shape, self, state) # walk over size params return result def map_dict_of_named_arrays(self, expr: DictOfNamedArrays, state: CodeGenState) -> None: for key in expr: subexpr = expr[key].expr name = state.var_name_gen("_pt_temp") insn_id = add_store(name, subexpr, self.rec(subexpr, state), state, output_to_temporary=True, cgen_mapper=self) state.results[subexpr] = state.results[expr[key]] = ( StoredResult(name, subexpr.ndim, frozenset([insn_id]))) def map_named_array(self, expr: NamedArray, state: CodeGenState) -> ImplementedResult: if expr in state.results: return state.results[expr] self.rec(expr._container, state) assert expr in state.results return state.results[expr] def map_loopy_call(self, expr: LoopyCall, state: CodeGenState) -> None: from loopy.kernel.instruction import make_assignment from loopy.symbolic import SubArrayRef callee_kernel = expr.translation_unit[expr.entrypoint] state.update_program(lp.merge([state.program, expr.translation_unit])) domains = [] def _get_sub_array_ref(array: Array, name: str) -> "lp.symbolic.SubArrayRef": inames = tuple( state.var_name_gen(f"_{name}_dim{d}") for d in range(array.ndim)) domains.append(domain_for_shape(inames, shape_to_scalar_expression(array.shape, self, state), {})) inames_as_vars = tuple(var(iname) for iname in inames) return SubArrayRef(inames_as_vars, prim.Subscript(var(name), inames_as_vars)) assignees = [] params = [] depends_on: Set[str] = set() new_tvs = {} new_insn_id = state.insn_id_gen(f"call_{callee_kernel.name}") for arg in callee_kernel.args: # must traverse in the order of callee's args to generate the correct # assignees order if isinstance(arg, lp.ArrayArg): if arg.is_output: assignee_name = state.var_name_gen("_pt_temp") assignees.append(_get_sub_array_ref(expr[arg.name], assignee_name)) named_array = expr[arg.name] # stored result for the assignee result = StoredResult(assignee_name, named_array.ndim, frozenset([new_insn_id])) # record the result for the corresponding loopy array state.results[named_array] = result new_tvs[assignee_name] = get_loopy_temporary(assignee_name, named_array, self, state) else: assert arg.is_input pt_arg = expr.bindings[arg.name] assert isinstance(pt_arg, Array) pt_arg_rec = self.rec(pt_arg, state) if isinstance(pt_arg_rec, StoredResult): # found a stored result corresponding to the argument, use it name = pt_arg_rec.name params.append(_get_sub_array_ref(pt_arg, name)) depends_on.update(pt_arg_rec.depends_on) else: # did not find a stored result for the sub-expression, store # it and then pass it to the call name = state.var_name_gen("_pt_temp") store_insn_id = add_store(name, pt_arg, pt_arg_rec, state, output_to_temporary=True, cgen_mapper=self) depends_on.add(store_insn_id) # replace "arg" with the created stored variable state.results[pt_arg] = StoredResult(name, pt_arg.ndim, frozenset([store_insn_id])) params.append(_get_sub_array_ref(pt_arg, name)) new_tvs[name] = get_loopy_temporary(name, pt_arg, self, state) else: assert isinstance(arg, lp.ValueArg) and arg.is_input pt_arg = expr.bindings[arg.name] loopy_expr_context = LoopyExpressionContext(state, local_namespace={}, num_indices=0) if isinstance(pt_arg, Array): assert pt_arg.ndim == 0 params.append(self.rec(pt_arg, state).to_loopy_expression( (), loopy_expr_context)) else: params.append(self.exprgen_mapper(pt_arg, loopy_expr_context)) # }}} new_insn = make_assignment( tuple(assignees), var(expr.entrypoint)(*params), depends_on=frozenset(depends_on), id=new_insn_id) # update kernel kernel = state.kernel tvs = state.kernel.temporary_variables.copy() tvs.update(new_tvs) kernel = kernel.copy(instructions=kernel.instructions+[new_insn], temporary_variables=tvs, domains=kernel.domains+domains) state.update_kernel(kernel)
# }}} # {{{ inlined expression gen mapper ELWISE_INDEX_RE = re.compile("_(0|([1-9][0-9]*))") REDUCTION_INDEX_RE = re.compile("_r(0|([1-9][0-9]*))") # Maps Pytato reduction types to the corresponding Loopy reduction types. PYTATO_REDUCTION_TO_LOOPY_REDUCTION = { "sum": "sum", "product": "product", "max": "max", "min": "min", }
[docs]class InlinedExpressionGenMapper(scalar_expr.IdentityMapper): """A mapper for generating :mod:`loopy` expressions with inlined sub-expressions. The inputs to this mapper are scalar expression as found in :class:`pytato.array.IndexLambda`, or expressions that are compatible (e.g., shape expressions). The outputs of this mapper are scalar expressions suitable for wrapping in :class:`InlinedResult`. """ codegen_mapper: CodeGenMapper def __init__(self, codegen_mapper: CodeGenMapper): self.codegen_mapper = codegen_mapper def __call__(self, expr: ScalarExpression, expr_context: LoopyExpressionContext) -> ScalarExpression: return self.rec(expr, expr_context) def map_subscript(self, expr: prim.Subscript, expr_context: LoopyExpressionContext) -> ScalarExpression: assert isinstance(expr.aggregate, prim.Variable) result: ImplementedResult = self.codegen_mapper( expr_context.lookup(expr.aggregate.name), expr_context.state) return result.to_loopy_expression(self.rec(expr.index, expr_context), expr_context) def map_variable(self, expr: prim.Variable, expr_context: LoopyExpressionContext) -> ScalarExpression: elw_match = ELWISE_INDEX_RE.fullmatch(expr.name) redn_match = REDUCTION_INDEX_RE.fullmatch(expr.name) if elw_match: # Found an index of the form _0, _1, ... index = int(elw_match.group(1)) if not (0 <= index < expr_context.num_indices): raise ValueError(f"invalid index encountered: _{index}") return expr elif redn_match: if expr.name not in expr_context.reduction_bounds: raise ValueError(f"invalid index encountered: '{expr}'.") return expr else: array = expr_context.lookup(expr.name) impl_result: ImplementedResult = self.codegen_mapper(array, expr_context.state) return impl_result.to_loopy_expression((), expr_context) def map_call(self, expr: prim.Call, expr_context: LoopyExpressionContext) -> ScalarExpression: if isinstance(expr.function, prim.Variable) and ( expr.function.name.startswith("pytato.c99.")): name_in_loopy = expr.function.name[11:] return prim.Call(prim.Variable(name_in_loopy), self.rec(expr.parameters, expr_context)) return super().map_call(expr, expr_context) def map_reduce(self, expr: scalar_expr.Reduce, expr_context: LoopyExpressionContext) -> ScalarExpression: from loopy.symbolic import Reduction as LoopyReduction state = expr_context.state unique_names_mapping = { old_name: prim.Variable( state.var_name_gen(f"_pt_{expr.op}" + old_name)) for old_name in expr.bounds} inner_expr = self.rec(expr.inner_expr, LoopyExpressionContext( state=state, _depends_on=expr_context.depends_on, local_namespace=expr_context.local_namespace, num_indices=expr_context.num_indices, reduction_bounds=expr.bounds)) # type: ignore inner_expr = loopy_substitute(inner_expr, unique_names_mapping) try: loopy_redn = PYTATO_REDUCTION_TO_LOOPY_REDUCTION[expr.op] except KeyError: raise NotImplementedError(expr.op) inner_expr = LoopyReduction(loopy_redn, tuple(v.name for v in unique_names_mapping.values()), inner_expr) domain = domain_for_shape((), shape=(), reductions={ unique_names_mapping[redn_iname].name: self.rec(bounds, expr_context) for redn_iname, bounds in expr.bounds.items()}) kernel = state.kernel state.update_kernel(kernel.copy(domains=kernel.domains+[domain])) return inner_expr
# }}} # {{{ utils def shape_to_scalar_expression(shape: ShapeType, cgen_mapper: CodeGenMapper, state: CodeGenState ) -> Tuple[ScalarExpression, ...]: shape_context = LoopyExpressionContext(state, num_indices=0) result: List[ScalarExpression] = [] for component in shape: if isinstance(component, int): result.append(component) else: assert isinstance(component, Array) result.append( cgen_mapper(component, state).to_loopy_expression((), shape_context)) return tuple(result)
[docs]def domain_for_shape(dim_names: Tuple[str, ...], shape: Tuple[ScalarExpression, ...], reductions: Dict[str, Tuple[ScalarExpression, ScalarExpression]], ) -> isl.BasicSet: # noqa """Create an :class:`islpy.BasicSet` that expresses an appropriate index domain for an array of (potentially symbolic) shape *shape* having reduction dimensions *reductions*. :param dim_names: A tuple of strings, the names of the axes. These become set dimensions in the returned domain. :param shape: A tuple of constant or quasi-affine :mod:`pymbolic` expressions. The variables in these expressions become parameter dimensions in the returned set. Must have the same length as *dim_names*. :arg reductions: A map from reduction inames to (lower, upper) bounds (as half-open integer ranges). The variables in the bounds become parameter dimensions in the returned set. """ assert len(dim_names) == len(shape) # Collect parameters. param_names_set: Set[str] = set() for sdep in map(scalar_expr.get_dependencies, shape): param_names_set |= sdep for bounds in reductions.values(): for sdep in map(scalar_expr.get_dependencies, bounds): # FIXME: Assumes that reduction bounds are not data-dependent. param_names_set |= sdep set_names = sorted(tuple(dim_names) + tuple(reductions)) param_names = sorted(param_names_set) # Build domain. dom = isl.BasicSet.universe( isl.Space.create_from_names(isl.DEFAULT_CONTEXT, set=set_names, params=param_names)) # Add constraints. from loopy.symbolic import aff_from_expr affs = isl.affs_from_space(dom.space) for iname, dim in zip(dim_names, shape): dom &= affs[0].le_set(affs[iname]) dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim)) for iname, (left, right) in reductions.items(): dom &= aff_from_expr(dom.space, left).le_set(affs[iname]) dom &= affs[iname].lt_set(aff_from_expr(dom.space, right)) doms = dom.get_basic_sets() if len(doms) == 0: # empty set dom = isl.BasicSet.empty(dom.get_space()) else: dom, = doms return dom
[docs]def add_store(name: str, expr: Array, result: ImplementedResult, state: CodeGenState, cgen_mapper: CodeGenMapper, output_to_temporary: bool = False) -> str: """Add an instruction that stores to a variable in the kernel. :param name: name of the output array, which is created :param expr: the :class:`~pytato.Array` to store :param result: the corresponding :class:`ImplementedResult` :param state: code generation state :param output_to_temporary: whether to generate an output argument (default) or a temporary variable :returns: the id of the generated instruction """ # Get expression. inames = tuple( state.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)) indices = tuple(prim.Variable(iname) for iname in inames) loopy_expr_context = LoopyExpressionContext(state, num_indices=0) loopy_expr = result.to_loopy_expression(indices, loopy_expr_context) # Rename reduction variables to names suitable as inames. loopy_expr = rename_reductions( loopy_expr, loopy_expr_context, lambda old_name: state.var_name_gen(f"{name}{old_name}")) # Make the instruction from loopy.kernel.instruction import make_assignment if indices: assignee = prim.Variable(name)[indices] else: assignee = prim.Variable(name) insn_id = state.insn_id_gen(f"{name}_store") insn = make_assignment((assignee,), loopy_expr, id=insn_id, within_inames=frozenset(inames), depends_on=loopy_expr_context.depends_on) shape = shape_to_scalar_expression(expr.shape, cgen_mapper, state) # Get the domain. domain = domain_for_shape(inames, shape, loopy_expr_context.reduction_bounds) # Update the kernel. kernel = state.kernel if output_to_temporary: tvar = get_loopy_temporary(name, expr, cgen_mapper, state) temporary_variables = kernel.temporary_variables.copy() temporary_variables[name] = tvar kernel = kernel.copy(temporary_variables=temporary_variables, domains=kernel.domains + [domain], instructions=kernel.instructions + [insn]) else: arg = lp.GlobalArg(name, shape=shape, dtype=expr.dtype, order="C", is_input=False, is_output=True) kernel = kernel.copy(args=kernel.args + [arg], domains=kernel.domains + [domain], instructions=kernel.instructions + [insn]) state.update_kernel(kernel) return insn_id
[docs]def get_loopy_temporary(name: str, expr: Array, cgen_mapper: CodeGenMapper, state: CodeGenState) -> lp.TemporaryVariable: # always allocating to global address space to avoid stack overflow address_space = lp.AddressSpace.GLOBAL return lp.TemporaryVariable(name, shape=shape_to_scalar_expression(expr.shape, cgen_mapper, state), dtype=expr.dtype, address_space=address_space)
[docs]def rename_reductions( loopy_expr: ScalarExpression, loopy_expr_context: LoopyExpressionContext, var_name_gen: Callable[[str], str]) -> ScalarExpression: """Rename the reduction variables in *loopy_expr* and *loopy_expr_context* using the callable *var_name_gen.* """ new_reduction_inames = tuple( var_name_gen(old_iname) for old_iname in loopy_expr_context.reduction_bounds) substitutions = dict(zip( loopy_expr_context.reduction_bounds, map(var, new_reduction_inames))) result = loopy_substitute(loopy_expr, substitutions) new_reduction_bounds = { substitutions[old_iname].name: bounds for old_iname, bounds in loopy_expr_context.reduction_bounds.items()} loopy_expr_context.reduction_bounds = new_reduction_bounds return result
# }}}
[docs]def get_initial_codegen_state(target: LoopyTarget, options: lp.Options) -> CodeGenState: kernel = lp.make_kernel("{:}", [], name="_pt_kernel", target=target.get_loopy_target(), options=options, lang_version=lp.MOST_RECENT_LANGUAGE_VERSION) return CodeGenState(_program=kernel, results=dict())
class InputNameRecorder(WalkMapper): def __init__(self, state: CodeGenState) -> None: super().__init__() self.state = state self.already_visited: Set[InputArgumentBase] = set() def post_visit(self, expr: Any) -> None: if (isinstance(expr, InputArgumentBase) and expr not in self.already_visited): assert expr.name is not None self.state.var_name_gen.add_names([expr.name]) self.already_visited.add(expr)
[docs]def generate_loopy(result: Union[Array, DictOfNamedArrays, Dict[str, Array]], target: Optional[LoopyTarget] = None, options: Optional[lp.Options] = None, *, cl_device: Optional["pyopencl.Device"] = None) -> BoundProgram: r"""Code generation entry point. :param result: Outputs of the computation. :param target: Code generation target. :param options: Code generation options for the kernel. :returns: A :class:`pytato.target.BoundProgram` wrapping the generated :mod:`loopy` program. If *result* is a :class:`dict` or a :class:`pytato.DictOfNamedArrays` and *options* is not supplied, then the Loopy option :attr:`~loopy.Options.return_dict` will be set to *True*. """ result_is_dict = isinstance(result, (dict, DictOfNamedArrays)) orig_outputs: DictOfNamedArrays = normalize_outputs(result) del result if target is None: target = LoopyPyOpenCLTarget(device=cl_device) else: if cl_device is not None: raise TypeError("may not pass both 'target' and 'cl_device'") preproc_result = preprocess(orig_outputs, target) outputs = preproc_result.outputs compute_order = preproc_result.compute_order if options is None and result_is_dict: options = lp.Options(return_dict=True) state = get_initial_codegen_state(target, options) input_name_recorder = InputNameRecorder(state) for name in compute_order: expr = outputs[name].expr # Reserve names of input and output arguments. input_name_recorder(expr) state.var_name_gen.add_names(outputs) cg_mapper = CodeGenMapper() # Generate code for outputs. for name in compute_order: expr = outputs[name].expr insn_id = add_store(name, expr, cg_mapper(expr, state), state, cg_mapper) # replace "expr" with the created stored variable state.results[expr] = StoredResult(name, expr.ndim, frozenset([insn_id])) # Why call make_reduction_inames_unique? # Consider pt.generate_loopy(pt.sum(x) + pt.sum(x)), the generated program # would be a single instruction with rhs: `_pt_subst() + _pt_subst()`. # The result of pt.sum(x) is cached => same instance of InlinedResult is # emitted for both invocations and we would be required to avoid such # reduction iname collisions. program = lp.make_reduction_inames_unique(state.program) return target.bind_program( program=program, bound_arguments=preproc_result.bound_arguments)
# }}} # vim:fdm=marker