Source code for pytential.qbx

__copyright__ = "Copyright (C) 2013 Andreas Kloeckner"

__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""

import numpy as np
import pyopencl as cl

from arraycontext import PyOpenCLArrayContext, freeze, thaw
from meshmode.dof_array import flatten, unflatten

from pytools import memoize_method, memoize_in, single_valued
from pytential.qbx.target_assoc import QBXTargetAssociationFailedException
from pytential.source import LayerPotentialSourceBase

import logging
logger = logging.getLogger(__name__)


__doc__ = """
.. autoclass:: QBXLayerPotentialSource

.. autoclass:: QBXTargetAssociationFailedException
"""


# {{{ QBX layer potential source

class _not_provided:  # noqa: N801
    pass


[docs]class QBXLayerPotentialSource(LayerPotentialSourceBase):
    """A source discretization for a QBX layer potential.

    .. attribute :: qbx_order
    .. attribute :: fmm_order

    .. automethod :: __init__
    .. automethod :: copy

    See :ref:`qbxguts` for some information on the inner workings of this.
    """

    # {{{ constructor / copy

[docs]    def __init__(self,
            density_discr,
            fine_order,
            qbx_order=None,
            fmm_order=None,
            fmm_level_to_order=None,
            expansion_factory=None,
            target_association_tolerance=_not_provided,

            # begin experimental arguments
            # FIXME default debug=False once everything has matured
            debug=True,
            _disable_refinement=False,
            _expansions_in_tree_have_extent=True,
            _expansion_stick_out_factor=0.5,
            _well_sep_is_n_away=2,
            _max_leaf_refine_weight=None,
            _box_extent_norm=None,
            _from_sep_smaller_crit=None,
            _from_sep_smaller_min_nsources_cumul=None,
            _tree_kind="adaptive",
            _use_target_specific_qbx=None,
            geometry_data_inspector=None,
            cost_model=None,
            fmm_backend="sumpy",
            target_stick_out_factor=_not_provided):
        """
        :arg fine_order: The total degree to which the (upsampled)
             underlying quadrature is exact.
        :arg fmm_order: `False` for direct calculation. May not be given if
            *fmm_level_to_order* is given.
        :arg fmm_level_to_order: A function that takes arguments of
             *(kernel, kernel_args, tree, level)* and returns the expansion
             order to be used on a given *level* of *tree* with *kernel*, where
             *kernel* is the :class:`sumpy.kernel.Kernel` being evaluated, and
             *kernel_args* is a set of *(key, value)* tuples with evaluated
             kernel arguments. May not be given if *fmm_order* is given.

        Experimental arguments without a promise of forward compatibility:

        :arg _use_target_specific_qbx: Whether to use target-specific
            acceleration by default if possible. *None* means
            "use if possible".
        :arg cost_model: Either *None* or an object implementing the
             :class:`~pytential.qbx.cost.AbstractQBXCostModel` interface, used for
             gathering modeled costs if provided (experimental)
        """

        # {{{ argument processing

        if fine_order is None:
            raise ValueError("fine_order must be provided.")

        if qbx_order is None:
            raise ValueError("qbx_order must be provided.")

        if target_stick_out_factor is not _not_provided:
            from warnings import warn
            warn("target_stick_out_factor has been renamed to "
                    "target_association_tolerance. "
                    "Using target_stick_out_factor is deprecated "
                    "and will stop working in 2018.",
                    DeprecationWarning, stacklevel=2)

            if target_association_tolerance is not _not_provided:
                raise TypeError("May not pass both target_association_tolerance and "
                        "target_stick_out_factor.")

            target_association_tolerance = target_stick_out_factor

        del target_stick_out_factor

        if target_association_tolerance is _not_provided:
            target_association_tolerance = float(
                    np.finfo(density_discr.real_dtype).eps) * 1e3

        if fmm_order is not None and fmm_level_to_order is not None:
            raise TypeError("may not specify both fmm_order and fmm_level_to_order")

        if _box_extent_norm is None:
            _box_extent_norm = "l2"

        if _from_sep_smaller_crit is None:
            # This seems to win no matter what the box extent norm is
            # https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/10
            _from_sep_smaller_crit = "precise_linf"

        if fmm_level_to_order is None:
            if fmm_order is False:
                fmm_level_to_order = False
            else:
                def fmm_level_to_order(kernel, kernel_args, tree, level):  # noqa pylint:disable=function-redefined
                    return fmm_order

        if _max_leaf_refine_weight is None:
            if density_discr.ambient_dim == 2:
                # FIXME: This should be verified now that l^2 is the default.
                _max_leaf_refine_weight = 64
            elif density_discr.ambient_dim == 3:
                # For static_linf/linf: https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/8#note_25009  # noqa
                # For static_l2/l2: https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/12  # noqa
                _max_leaf_refine_weight = 512
            else:
                # Just guessing...
                _max_leaf_refine_weight = 64

        if _from_sep_smaller_min_nsources_cumul is None:
            # See here for the comment thread that led to these defaults:
            # https://gitlab.tiker.net/inducer/boxtree/merge_requests/28#note_18661
            if density_discr.dim == 1:
                _from_sep_smaller_min_nsources_cumul = 15
            else:
                _from_sep_smaller_min_nsources_cumul = 30

        # }}}

        LayerPotentialSourceBase.__init__(self, density_discr)

        self.fine_order = fine_order
        self.qbx_order = qbx_order
        self.fmm_level_to_order = fmm_level_to_order

        assert target_association_tolerance is not None

        self.target_association_tolerance = target_association_tolerance
        self.fmm_backend = fmm_backend

        if expansion_factory is None:
            from sumpy.expansion import DefaultExpansionFactory
            expansion_factory = DefaultExpansionFactory()
        self.expansion_factory = expansion_factory

        self.debug = debug
        self._disable_refinement = _disable_refinement
        self._expansions_in_tree_have_extent = \
                _expansions_in_tree_have_extent
        self._expansion_stick_out_factor = _expansion_stick_out_factor
        self._well_sep_is_n_away = _well_sep_is_n_away
        self._max_leaf_refine_weight = _max_leaf_refine_weight
        self._box_extent_norm = _box_extent_norm
        self._from_sep_smaller_crit = _from_sep_smaller_crit
        self._from_sep_smaller_min_nsources_cumul = \
                _from_sep_smaller_min_nsources_cumul
        self._tree_kind = _tree_kind
        self._use_target_specific_qbx = _use_target_specific_qbx
        self.geometry_data_inspector = geometry_data_inspector

        if cost_model is None:
            from pytential.qbx.cost import QBXCostModel
            cost_model = QBXCostModel()

        self.cost_model = cost_model

        # /!\ *All* parameters set here must also be set by copy() below,
        # otherwise they will be reset to their default values behind your
        # back if the layer potential source is ever copied. (such as
        # during refinement)

[docs]    def copy(
            self,
            density_discr=None,
            fine_order=None,
            qbx_order=None,
            fmm_order=_not_provided,
            fmm_level_to_order=_not_provided,
            expansion_factory=None,
            target_association_tolerance=_not_provided,
            _expansions_in_tree_have_extent=_not_provided,
            _expansion_stick_out_factor=_not_provided,
            _max_leaf_refine_weight=None,
            _box_extent_norm=None,
            _from_sep_smaller_crit=None,
            _tree_kind=None,
            _use_target_specific_qbx=_not_provided,
            geometry_data_inspector=None,
            cost_model=_not_provided,
            fmm_backend=None,

            debug=_not_provided,
            _disable_refinement=_not_provided,
            target_stick_out_factor=_not_provided,
            ):

        # {{{ argument processing

        if target_stick_out_factor is not _not_provided:
            from warnings import warn
            warn("target_stick_out_factor has been renamed to "
                    "target_association_tolerance. "
                    "Using target_stick_out_factor is deprecated "
                    "and will stop working in 2018.",
                    DeprecationWarning, stacklevel=2)

            if target_association_tolerance is not _not_provided:
                raise TypeError("May not pass both target_association_tolerance and "
                        "target_stick_out_factor.")

            target_association_tolerance = target_stick_out_factor

        elif target_association_tolerance is _not_provided:
            target_association_tolerance = self.target_association_tolerance

        del target_stick_out_factor

        # }}}

        kwargs = {}

        if (fmm_order is not _not_provided
                and fmm_level_to_order is not _not_provided):
            raise TypeError("may not specify both fmm_order and fmm_level_to_order")
        elif fmm_order is not _not_provided:
            kwargs["fmm_order"] = fmm_order
        elif fmm_level_to_order is not _not_provided:
            kwargs["fmm_level_to_order"] = fmm_level_to_order
        else:
            kwargs["fmm_level_to_order"] = self.fmm_level_to_order

        # FIXME Could/should share wrangler and geometry kernels
        # if no relevant changes have been made.
        return QBXLayerPotentialSource(
                density_discr=density_discr or self.density_discr,
                fine_order=(
                    fine_order if fine_order is not None else self.fine_order),
                qbx_order=qbx_order if qbx_order is not None else self.qbx_order,

                target_association_tolerance=target_association_tolerance,
                expansion_factory=(
                    expansion_factory or self.expansion_factory),

                debug=(
                    # False is a valid value here
                    debug if debug is not _not_provided else self.debug),
                _disable_refinement=(
                    # False is a valid value here
                    _disable_refinement
                    if _disable_refinement is not _not_provided
                    else self._disable_refinement),
                _expansions_in_tree_have_extent=(
                    # False is a valid value here
                    _expansions_in_tree_have_extent
                    if _expansions_in_tree_have_extent is not _not_provided
                    else self._expansions_in_tree_have_extent),
                _expansion_stick_out_factor=(
                    # 0 is a valid value here
                    _expansion_stick_out_factor
                    if _expansion_stick_out_factor is not _not_provided
                    else self._expansion_stick_out_factor),
                _well_sep_is_n_away=self._well_sep_is_n_away,
                _max_leaf_refine_weight=(
                    _max_leaf_refine_weight or self._max_leaf_refine_weight),
                _box_extent_norm=(_box_extent_norm or self._box_extent_norm),
                _from_sep_smaller_crit=(
                    _from_sep_smaller_crit or self._from_sep_smaller_crit),
                _from_sep_smaller_min_nsources_cumul=(
                    self._from_sep_smaller_min_nsources_cumul),
                _tree_kind=_tree_kind or self._tree_kind,
                _use_target_specific_qbx=(_use_target_specific_qbx
                    if _use_target_specific_qbx is not _not_provided
                    else self._use_target_specific_qbx),
                geometry_data_inspector=(
                    geometry_data_inspector or self.geometry_data_inspector),
                cost_model=(
                    # None is a valid value here
                    cost_model
                    if cost_model is not _not_provided
                    else self.cost_model),
                fmm_backend=fmm_backend or self.fmm_backend,
                **kwargs)

    # }}}

    # {{{ code containers

    @property
    def tree_code_container(self):
        @memoize_in(self._setup_actx, (
                QBXLayerPotentialSource, "tree_code_container"))
        def make_container():
            from pytential.qbx.utils import TreeCodeContainer
            return TreeCodeContainer(self._setup_actx)

        return make_container()

    @property
    def refiner_code_container(self):
        @memoize_in(self._setup_actx, (
                QBXLayerPotentialSource, "refiner_code_container"))
        def make_container():
            from pytential.qbx.refinement import RefinerCodeContainer
            return RefinerCodeContainer(
                    self._setup_actx, self.tree_code_container)

        return make_container()

    @property
    def target_association_code_container(self):
        @memoize_in(self._setup_actx, (
                QBXLayerPotentialSource, "target_association_code_container"))
        def make_container():
            from pytential.qbx.target_assoc import TargetAssociationCodeContainer
            return TargetAssociationCodeContainer(
                    self._setup_actx, self.tree_code_container)

        return make_container()

    @property
    def qbx_fmm_geometry_data_code_container(self):
        @memoize_in(self._setup_actx, (
                QBXLayerPotentialSource, "qbx_fmm_geometry_data_code_container"))
        def make_container(
                debug, ambient_dim, well_sep_is_n_away,
                from_sep_smaller_crit):
            from pytential.qbx.geometry import QBXFMMGeometryDataCodeContainer
            return QBXFMMGeometryDataCodeContainer(
                    self._setup_actx,
                    ambient_dim, self.tree_code_container, debug,
                    _well_sep_is_n_away=well_sep_is_n_away,
                    _from_sep_smaller_crit=from_sep_smaller_crit)

        return make_container(
                self.debug, self.ambient_dim,
                self._well_sep_is_n_away, self._from_sep_smaller_crit)

    # }}}

    # {{{ internal API

    @memoize_method
    def qbx_fmm_geometry_data(self, places, name,
            target_discrs_and_qbx_sides):
        """
        :arg target_discrs_and_qbx_sides:
            a tuple of *(discr, qbx_forced_limit)*
            tuples, where *discr* is a
            :class:`meshmode.discretization.Discretization`
            or
            :class:`pytential.target.TargetBase`
            instance
        """
        from pytential.qbx.geometry import QBXFMMGeometryData

        return QBXFMMGeometryData(places, name,
                self.qbx_fmm_geometry_data_code_container,
                target_discrs_and_qbx_sides,
                target_association_tolerance=self.target_association_tolerance,
                tree_kind=self._tree_kind,
                debug=self.debug)

    # }}}

    # {{{ helpers for symbolic operator processing

    def preprocess_optemplate(self, name, discretizations, expr):
        """
        :arg name: The symbolic name for *self*, which the preprocessor
            should use to find which expressions it is allowed to modify.
        """
        from pytential.symbolic.mappers import QBXPreprocessor
        return QBXPreprocessor(name, discretizations)(expr)

    def op_group_features(self, expr):
        from pytential.utils import sort_arrays_together
        result = (
                expr.source, *sort_arrays_together(expr.source_kernels,
                expr.densities, key=str)
                )

        return result

    # }}}

    # {{{ internal functionality for execution

    def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate,
            return_timing_data):
        extra_args = {}

        if self.fmm_level_to_order is False:
            func = self.exec_compute_potential_insn_direct
            extra_args["return_timing_data"] = return_timing_data

        else:
            func = self.exec_compute_potential_insn_fmm

            def drive_fmm(wrangler, strengths, geo_data, kernel, kernel_arguments):
                del geo_data, kernel, kernel_arguments
                from pytential.qbx.fmm import drive_fmm
                if return_timing_data:
                    timing_data = {}
                else:
                    timing_data = None
                return drive_fmm(wrangler, strengths, timing_data), timing_data

            extra_args["fmm_driver"] = drive_fmm

        return self._dispatch_compute_potential_insn(
                actx, insn, bound_expr, evaluate, func, extra_args)

    def cost_model_compute_potential_insn(self, actx, insn, bound_expr, evaluate,
                                          calibration_params, per_box):
        """Using :attr:`cost_model`, evaluate the cost of executing *insn*.
        Cost model results are gathered in
        :attr:`pytential.symbolic.execution.BoundExpression.modeled_cost`
        along the way.

        :arg calibration_params: a :class:`dict` of calibration parameters, mapping
            from parameter names to calibration values.
        :arg per_box: if *true*, cost model result will be a :class:`numpy.ndarray`
            or :class:`pyopencl.array.Array` with shape of the number of boxes, where
            the ith entry is the sum of the cost of all stages for box i. If *false*,
            cost model result will be a :class:`dict`, mapping from the stage name to
            predicted cost of the stage for all boxes.

        :returns: whatever :meth:`exec_compute_potential_insn_fmm` returns.
        """
        if self.fmm_level_to_order is False:
            raise NotImplementedError("perf modeling direct evaluations")

        def drive_cost_model(
                    wrangler, strengths, geo_data, kernel, kernel_arguments):
            del strengths

            if per_box:
                cost_model_result, metadata = self.cost_model.qbx_cost_per_box(
                    actx.queue, geo_data, kernel, kernel_arguments,
                    calibration_params
                )
            else:
                cost_model_result, metadata = self.cost_model.qbx_cost_per_stage(
                    actx.queue, geo_data, kernel, kernel_arguments,
                    calibration_params
                )

            from pytools.obj_array import obj_array_vectorize
            return (
                    obj_array_vectorize(
                        wrangler.finalize_potentials,
                        wrangler.full_output_zeros()),
                    (cost_model_result, metadata))

        return self._dispatch_compute_potential_insn(
            actx, insn, bound_expr, evaluate,
            self.exec_compute_potential_insn_fmm,
            extra_args={"fmm_driver": drive_cost_model}
        )

    def _dispatch_compute_potential_insn(self, actx, insn, bound_expr,
            evaluate, func, extra_args=None):
        if self._disable_refinement:
            from warnings import warn
            warn(
                    "Executing global QBX without refinement. "
                    "This is unlikely to work.")

        if extra_args is None:
            extra_args = {}

        return func(actx, insn, bound_expr, evaluate, **extra_args)

    # {{{ fmm-based execution

    @memoize_method
    def expansion_wrangler_code_container(self, source_kernels, target_kernels):
        from functools import partial
        base_kernel = single_valued(kernel.get_base_kernel() for
            kernel in source_kernels)
        mpole_expn_class = \
                self.expansion_factory.get_multipole_expansion_class(base_kernel)
        local_expn_class = \
                self.expansion_factory.get_local_expansion_class(base_kernel)

        fmm_mpole_factory = partial(mpole_expn_class, base_kernel)
        fmm_local_factory = partial(local_expn_class, base_kernel)
        qbx_local_factory = partial(local_expn_class, base_kernel)

        if self.fmm_backend == "sumpy":
            from pytential.qbx.fmm import \
                    QBXSumpyExpansionWranglerCodeContainer
            return QBXSumpyExpansionWranglerCodeContainer(
                    self.cl_context,
                    fmm_mpole_factory, fmm_local_factory, qbx_local_factory,
                    target_kernels=target_kernels, source_kernels=source_kernels)

        elif self.fmm_backend == "fmmlib":
            source_kernel, = source_kernels
            target_kernels_new = [
                target_kernel.replace_base_kernel(source_kernel) for
                target_kernel in target_kernels
            ]
            from pytential.qbx.fmmlib import \
                    QBXFMMLibExpansionWranglerCodeContainer
            return QBXFMMLibExpansionWranglerCodeContainer(
                    self.cl_context,
                    fmm_mpole_factory, fmm_local_factory, qbx_local_factory,
                    target_kernels=target_kernels_new)

        else:
            raise ValueError(f"invalid FMM backend: {self.fmm_backend}")

    def get_target_discrs_and_qbx_sides(self, insn, bound_expr):
        """Build the list of unique target discretizations used by the
        provided instruction.
        """
        # map (name, qbx_side) to number in list
        target_name_and_side_to_number = {}
        # list of tuples (discr, qbx_side)
        target_discrs_and_qbx_sides = []

        for o in insn.outputs:
            key = (o.target_name, o.qbx_forced_limit)
            if key not in target_name_and_side_to_number:
                target_name_and_side_to_number[key] = \
                        len(target_discrs_and_qbx_sides)

                target_discr = bound_expr.places.get_discretization(
                        o.target_name.geometry, o.target_name.discr_stage)
                if isinstance(target_discr, LayerPotentialSourceBase):
                    target_discr = target_discr.density_discr

                qbx_forced_limit = o.qbx_forced_limit
                if qbx_forced_limit is None:
                    qbx_forced_limit = 0

                target_discrs_and_qbx_sides.append(
                        (target_discr, qbx_forced_limit))

        return target_name_and_side_to_number, tuple(target_discrs_and_qbx_sides)

    def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext,
            insn, bound_expr, evaluate, fmm_driver):
        """
        :arg fmm_driver: A function that accepts four arguments:
            *wrangler*, *strength*, *geo_data*, *kernel*, *kernel_arguments*
        :returns: a tuple ``(assignments, extra_outputs)``, where *assignments*
            is a list of tuples containing pairs ``(name, value)`` representing
            assignments to be performed in the evaluation context.
            *extra_outputs* is data that *fmm_driver* may return
            (such as timing data), passed through unmodified.
        """
        target_name_and_side_to_number, target_discrs_and_qbx_sides = (
                self.get_target_discrs_and_qbx_sides(insn, bound_expr))

        geo_data = self.qbx_fmm_geometry_data(
                bound_expr.places,
                insn.source.geometry,
                target_discrs_and_qbx_sides)

        # FIXME Exert more positive control over geo_data attribute lifetimes using
        # geo_data.<method>.clear_cache(geo_data).

        # FIXME Synthesize "bad centers" around corners and edges that have
        # inadequate QBX coverage.

        # FIXME don't compute *all* output kernels on all targets--respect that
        # some target discretizations may only be asking for derivatives (e.g.)

        flat_strengths = _get_flat_strengths_from_densities(
                actx, bound_expr.places, evaluate, insn.densities,
                dofdesc=insn.source)

        base_kernel = single_valued(knl.get_base_kernel() for
            knl in insn.source_kernels)

        output_and_expansion_dtype = (
                self.get_fmm_output_and_expansion_dtype(insn.source_kernels,
                    flat_strengths[0]))
        kernel_extra_kwargs, source_extra_kwargs = (
                self.get_fmm_expansion_wrangler_extra_kwargs(
                    actx, insn.target_kernels + insn.source_kernels,
                    geo_data.tree().user_source_ids,
                    insn.kernel_arguments, evaluate))

        wrangler = self.expansion_wrangler_code_container(
                target_kernels=insn.target_kernels,
                source_kernels=insn.source_kernels).get_wrangler(
                        actx.queue, geo_data, output_and_expansion_dtype,
                        self.qbx_order,
                        self.fmm_level_to_order,
                        source_extra_kwargs=source_extra_kwargs,
                        kernel_extra_kwargs=kernel_extra_kwargs,
                        _use_target_specific_qbx=self._use_target_specific_qbx)

        from pytential.qbx.geometry import target_state
        if (actx.thaw(geo_data.user_target_to_center())
                == target_state.FAILED).any().get():
            raise RuntimeError("geometry has failed targets")

        # {{{ geometry data inspection hook

        if self.geometry_data_inspector is not None:
            perform_fmm = self.geometry_data_inspector(insn, bound_expr, geo_data)
            if not perform_fmm:
                return [(o.name, 0) for o in insn.outputs]

        # }}}

        # Execute global QBX.
        all_potentials_on_every_target, extra_outputs = (
                fmm_driver(
                    wrangler, flat_strengths, geo_data,
                    base_kernel, kernel_extra_kwargs))

        results = []

        for o in insn.outputs:
            target_side_number = target_name_and_side_to_number[
                    o.target_name, o.qbx_forced_limit]
            target_discr, _ = target_discrs_and_qbx_sides[target_side_number]
            target_slice = slice(*geo_data.target_info().target_discr_starts[
                    target_side_number:target_side_number+2])

            result = \
                all_potentials_on_every_target[o.target_kernel_index][target_slice]

            from meshmode.discretization import Discretization
            if isinstance(target_discr, Discretization):
                result = unflatten(actx, target_discr, result)

            results.append((o.name, result))

        return results, extra_outputs

    # }}}

    # {{{ direct execution

    @memoize_method
    def get_expansion_for_qbx_direct_eval(self, base_kernel, target_kernels):
        from sumpy.expansion.local import LineTaylorLocalExpansion
        from sumpy.kernel import TargetDerivativeRemover

        # line Taylor cannot support target derivatives
        txr = TargetDerivativeRemover()
        if any(knl != txr(knl) for knl in target_kernels):
            return self.expansion_factory.get_local_expansion_class(
                    base_kernel)(base_kernel, self.qbx_order)
        else:
            return LineTaylorLocalExpansion(base_kernel, self.qbx_order)

    @memoize_method
    def get_lpot_applier(self, target_kernels, source_kernels):
        # needs to be separate method for caching

        if any(knl.is_complex_valued for knl in target_kernels):
            value_dtype = self.density_discr.complex_dtype
        else:
            value_dtype = self.density_discr.real_dtype

        base_kernel = single_valued(knl.get_base_kernel() for knl in source_kernels)

        from sumpy.qbx import LayerPotential
        return LayerPotential(self.cl_context,
                    expansion=self.get_expansion_for_qbx_direct_eval(
                        base_kernel, target_kernels),
                    target_kernels=target_kernels, source_kernels=source_kernels,
                    value_dtypes=value_dtype)

    @memoize_method
    def get_lpot_applier_on_tgt_subset(self, target_kernels, source_kernels):
        # needs to be separate method for caching

        if any(knl.is_complex_valued for knl in target_kernels):
            value_dtype = self.density_discr.complex_dtype
        else:
            value_dtype = self.density_discr.real_dtype

        base_kernel = single_valued(knl.get_base_kernel() for knl in source_kernels)

        from pytential.qbx.direct import LayerPotentialOnTargetAndCenterSubset
        from sumpy.expansion.local import VolumeTaylorLocalExpansion
        return LayerPotentialOnTargetAndCenterSubset(
                self.cl_context,
                expansion=VolumeTaylorLocalExpansion(base_kernel, self.qbx_order),
                target_kernels=target_kernels, source_kernels=source_kernels,
                value_dtypes=value_dtype)

    @memoize_method
    def get_qbx_target_numberer(self, dtype):
        assert dtype == np.int32
        from pyopencl.scan import GenericScanKernel
        return GenericScanKernel(
                self.cl_context, np.int32,
                arguments="int *tgt_to_qbx_center, int *qbx_tgt_number, int *count",
                input_expr="tgt_to_qbx_center[i] >= 0 ? 1 : 0",
                scan_expr="a+b", neutral="0",
                output_statement="""
                    if (item != prev_item)
                        qbx_tgt_number[item-1] = i;

                    if (i+1 == N)
                        *count = item;
                    """)

    def exec_compute_potential_insn_direct(self, actx, insn, bound_expr, evaluate,
            return_timing_data):
        from pytential import bind, sym
        from meshmode.discretization import Discretization

        if return_timing_data:
            from pytential.source import UnableToCollectTimingData
            from warnings import warn
            warn(
                    "Timing data collection not supported.",
                    category=UnableToCollectTimingData)

        # {{{ evaluate and flatten inputs

        @memoize_in(bound_expr.places,
                (QBXLayerPotentialSource, "flat_nodes"))
        def _flat_nodes(dofdesc):
            discr = bound_expr.places.get_discretization(
                    dofdesc.geometry, dofdesc.discr_stage)
            return freeze(flatten(thaw(discr.nodes(), actx), strict=False), actx)

        @memoize_in(bound_expr.places,
                (QBXLayerPotentialSource, "flat_expansion_radii"))
        def _flat_expansion_radii(dofdesc):
            radii = bind(
                    bound_expr.places,
                    sym.expansion_radii(self.ambient_dim, dofdesc=dofdesc),
                    )(actx)
            return freeze(flatten(radii), actx)

        @memoize_in(bound_expr.places,
                (QBXLayerPotentialSource, "flat_centers"))
        def _flat_centers(dofdesc, qbx_forced_limit):
            centers = bind(bound_expr.places,
                    sym.expansion_centers(
                        self.ambient_dim, qbx_forced_limit, dofdesc=dofdesc),
                    )(actx)
            return freeze(flatten(centers), actx)

        kernel_args = {}
        for arg_name, arg_expr in insn.kernel_arguments.items():
            kernel_args[arg_name] = flatten(evaluate(arg_expr), strict=False)

        flat_strengths = _get_flat_strengths_from_densities(
                actx, bound_expr.places, evaluate, insn.densities,
                dofdesc=insn.source)

        flat_source_nodes = _flat_nodes(insn.source)

        # }}}

        # {{{ partition interactions in target kernels

        from collections import defaultdict
        self_outputs = defaultdict(list)
        other_outputs = defaultdict(list)

        for i, o in enumerate(insn.outputs):
            # For purposes of figuring out whether this is a self-interaction,
            # disregard discr_stage.
            source_dd = insn.source.copy(discr_stage=o.target_name.discr_stage)

            target_discr = bound_expr.places.get_discretization(
                    o.target_name.geometry, o.target_name.discr_stage)
            density_discr = bound_expr.places.get_discretization(
                    source_dd.geometry, source_dd.discr_stage)

            if target_discr is density_discr:
                # NOTE: QBXPreprocessor is supposed to have taken care of this
                assert o.qbx_forced_limit is not None
                assert abs(o.qbx_forced_limit) > 0

                self_outputs[(o.target_name, o.qbx_forced_limit)].append((i, o))
            else:
                qbx_forced_limit = o.qbx_forced_limit
                if qbx_forced_limit is None:
                    qbx_forced_limit = 0

                other_outputs[(o.target_name, qbx_forced_limit)].append((i, o))

        queue = actx.queue
        results = [None] * len(insn.outputs)

        # }}}

        # {{{ self interactions

        # FIXME: Do this all at once

        lpot_applier = self.get_lpot_applier(
                insn.target_kernels, insn.source_kernels)

        for (target_name, qbx_forced_limit), outputs in self_outputs.items():
            target_discr = bound_expr.places.get_discretization(
                    target_name.geometry, target_name.discr_stage)
            flat_target_nodes = _flat_nodes(target_name)

            evt, output_for_each_kernel = lpot_applier(queue,
                    targets=flat_target_nodes,
                    sources=flat_source_nodes,
                    centers=_flat_centers(target_name, qbx_forced_limit),
                    strengths=flat_strengths,
                    expansion_radii=_flat_expansion_radii(target_name),
                    **kernel_args)

            for i, o in outputs:
                result = output_for_each_kernel[o.target_kernel_index]
                if isinstance(target_discr, Discretization):
                    result = unflatten(actx, target_discr, result)

                results[i] = (o.name, result)

        # }}}

        # {{{ off-surface interactions

        if other_outputs:
            p2p = self.get_p2p(actx, insn.target_kernels, insn.source_kernels)
            lpot_applier_on_tgt_subset = self.get_lpot_applier_on_tgt_subset(
                    insn.target_kernels, insn.source_kernels)

        for (target_name, qbx_forced_limit), outputs in other_outputs.items():
            target_discr = bound_expr.places.get_discretization(
                    target_name.geometry, target_name.discr_stage)
            flat_target_nodes = _flat_nodes(target_name)

            # FIXME: (Somewhat wastefully) compute P2P for all targets
            evt, output_for_each_kernel = p2p(queue,
                    targets=flat_target_nodes,
                    sources=flat_source_nodes,
                    strength=flat_strengths,
                    **kernel_args)

            target_discrs_and_qbx_sides = ((target_discr, qbx_forced_limit),)
            geo_data = self.qbx_fmm_geometry_data(
                    bound_expr.places,
                    insn.source.geometry,
                    target_discrs_and_qbx_sides=target_discrs_and_qbx_sides)

            # center-related info is independent of targets

            # First ncenters targets are the centers
            tgt_to_qbx_center = (
                    geo_data.user_target_to_center()[geo_data.ncenters:]
                    .copy(queue=queue)
                    .with_queue(queue))

            qbx_tgt_numberer = self.get_qbx_target_numberer(
                    tgt_to_qbx_center.dtype)
            qbx_tgt_count = cl.array.empty(queue, (), np.int32)
            qbx_tgt_numbers = cl.array.empty_like(tgt_to_qbx_center)

            qbx_tgt_numberer(
                    tgt_to_qbx_center, qbx_tgt_numbers, qbx_tgt_count,
                    queue=queue)

            qbx_tgt_count = int(qbx_tgt_count.get())
            if (abs(qbx_forced_limit) == 1 and qbx_tgt_count < target_discr.ndofs):
                raise RuntimeError(
                        "Did not find a matching QBX center for some targets")

            qbx_tgt_numbers = qbx_tgt_numbers[:qbx_tgt_count]
            qbx_center_numbers = tgt_to_qbx_center[qbx_tgt_numbers]
            qbx_center_numbers.finish()

            tgt_subset_kwargs = kernel_args.copy()
            for i, res_i in enumerate(output_for_each_kernel):
                tgt_subset_kwargs[f"result_{i}"] = res_i

            if qbx_tgt_count:
                lpot_applier_on_tgt_subset(
                        queue,
                        targets=flat_target_nodes,
                        sources=flat_source_nodes,
                        centers=geo_data.flat_centers(),
                        expansion_radii=geo_data.flat_expansion_radii(),
                        strengths=flat_strengths,
                        qbx_tgt_numbers=qbx_tgt_numbers,
                        qbx_center_numbers=qbx_center_numbers,
                        **tgt_subset_kwargs)

            for i, o in outputs:
                result = output_for_each_kernel[o.target_kernel_index]
                if isinstance(target_discr, Discretization):
                    result = unflatten(actx, target_discr, result)

                results[i] = (o.name, result)

        # }}}

        timing_data = {}
        return results, timing_data

    # }}}

    # }}}


def _get_flat_strengths_from_densities(
        actx, places, evaluate, densities, dofdesc=None):
    from pytential import bind, sym
    waa = bind(
            places,
            sym.weights_and_area_elements(places.ambient_dim, dofdesc=dofdesc),
            )(actx)
    strength_vecs = [waa * evaluate(density) for density in densities]

    from meshmode.dof_array import flatten
    return [flatten(strength) for strength in strength_vecs]

# }}}


__all__ = (
        "QBXLayerPotentialSource",
        "QBXTargetAssociationFailedException",
        )

# vim: fdm=marker