Package hedge :: Module tools
Source Code for Module hedge.tools

   1  """Miscellaneous helper facilities.""" 
   2   
   3  from __future__ import division 
   4   
   5  __copyright__ = "Copyright (C) 2007 Andreas Kloeckner" 
   6   
   7  __license__ = """ 
   8  This program is free software: you can redistribute it and/or modify 
   9  it under the terms of the GNU General Public License as published by 
  10  the Free Software Foundation, either version 3 of the License, or 
  11  (at your option) any later version. 
  12   
  13  This program is distributed in the hope that it will be useful, 
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  GNU General Public License for more details. 
  17   
  18  You should have received a copy of the GNU General Public License 
  19  along with this program.  If not, see U{http://www.gnu.org/licenses/}. 
  20  """ 
  21   
  22   
  23   
  24   
  25   
  26  import numpy 
  27  import numpy.linalg as la 
  28  import pyublas 
  29  import hedge._internal 
  30   
  31   
  32   
  33  ZeroVector = hedge._internal.ZeroVector 
  34  cyl_bessel_j = hedge._internal.cyl_bessel_j 
  35  cyl_neumann = hedge._internal.cyl_neumann 
  36   
  37   
  38   
  39   
  40 -def is_zero(x): 
  41      return isinstance(x, (int, float)) and x == 0 
  42   
  43   
  44   
  45   
  46 -def relative_error(norm_diff, norm_true): 
  47      if norm_true == 0: 
  48          if norm_diff == 0: 
  49              return 0 
  50          else: 
  51              return float("inf") 
  52      else: 
  53          return norm_diff/norm_true 
  54   
  55   
  56   
  57   
  58 -def has_data_in_numpy_arrays(a, allow_objarray_levels): 
  59      if is_obj_array(a) and allow_objarray_levels > 0: 
  60          from pytools import indices_in_shape, all 
  61          return all( 
  62                  has_data_in_numpy_arrays( 
  63                      a[i], allow_objarray_levels=allow_objarray_levels-1) 
  64                  for i in indices_in_shape(a.shape)) 
  65      else: 
  66          return isinstance(a, numpy.ndarray) and a.dtype != object 
  67   
  68   
  69   
  70   
  71 -def numpy_linear_comb(lin_comb): 
  72      assert lin_comb 
  73   
  74      scalar_dtypes = tuple(numpy.array(fac).dtype for fac, ary in lin_comb) 
  75   
  76      from pytools import single_valued, indices_in_shape, flatten, \ 
  77              match_precision 
  78      from codepy.elementwise import make_linear_comb_kernel 
  79   
  80      if single_valued(is_obj_array(ary) for fac, ary in lin_comb): 
  81          oa_shape = single_valued(ary.shape for fac, ary in lin_comb) 
  82          result = numpy.zeros(oa_shape, dtype=object) 
  83          for i in indices_in_shape(oa_shape): 
  84              el_shape = single_valued(ary[i].shape for fac, ary in lin_comb) 
  85   
  86              vector_dtypes = tuple(ary[i].dtype for fac, ary in lin_comb) 
  87              scalar_dtypes = tuple( 
  88                      match_precision(sd, vd) 
  89                      for sd, vd in zip(scalar_dtypes, vector_dtypes)) 
  90   
  91              kernel, result_dtype = make_linear_comb_kernel( 
  92                      scalar_dtypes, vector_dtypes) 
  93   
  94              result[i] = numpy.zeros(el_shape, result_dtype) 
  95              kernel(result[i], *tuple(flatten((fac, ary[i]) for fac, ary in lin_comb))) 
  96   
  97          return result 
  98      else: 
  99          shape = single_valued(ary.shape for fac, ary in lin_comb) 
 100          vector_dtypes = tuple(ary.dtype for fac, ary in lin_comb) 
 101          scalar_dtypes = tuple( 
 102                  match_precision(sd, vd) 
 103                  for sd, vd in zip(scalar_dtypes, vector_dtypes)) 
 104   
 105          kernel, result_dtype = make_linear_comb_kernel( 
 106                  scalar_dtypes, vector_dtypes) 
 107   
 108          result = numpy.zeros(shape, result_dtype) 
 109          kernel(result, *tuple(flatten(lin_comb))) 
 110          return result 
 111   
 112   
 113   
 114   
 115 -def mul_add(afac, a, bfac, b, add_timer=None): 
 116      if is_obj_array(a): 
 117          return numpy.array([ 
 118              mul_add(afac, a_i, bfac, b_i, add_timer=add_timer) 
 119              for a_i, b_i in zip(a, b)], 
 120              dtype=object) 
 121      else: 
 122          return a.mul_add(afac, b, bfac, add_timer=add_timer) 
 123   
 124   
 125   
 126   
 127 -def cyl_bessel_j_prime(nu, z): 
 128      if nu == 0: 
 129          if z == 0: 
 130              return 0 
 131          else: 
 132              return -cyl_bessel_j(nu+1, z)+nu/z*cyl_bessel_j(nu, z) 
 133      else: 
 134          return 0.5*(cyl_bessel_j(nu-1, z)-cyl_bessel_j(nu+1, z)) 
 135   
 136   
 137   
 138   
 139  AffineMap = hedge._internal.AffineMap 
 140 -def _affine_map___getinitargs__(self): 
 141      return self.matrix, self.vector 
 142   
 143  AffineMap.__getinitargs__ = _affine_map___getinitargs__ 
 144   
 145   
 146   
 147   
 148 -class Rotation(AffineMap): 
 149 -    def __init__(self, angle): 
 150          # FIXME: Add axis, make multidimensional 
 151          from math import sin, cos 
 152          AffineMap.__init__(self, 
 153                  numpy.array([ 
 154                      [cos(angle), sin(angle)], 
 155                      [-sin(angle), cos(angle)]]), 
 156                  numpy.zeros((2,))) 
 157   
 158   
 159   
 160   
 161 -class Reflection(AffineMap): 
 162 -    def __init__(self, axis, dim): 
 163          mat = numpy.identity(dim) 
 164          mat[axis,axis] = -1 
 165          AffineMap.__init__(self, mat, numpy.zeros((dim,))) 
 166   
 167   
 168   
 169   
 170 -def plot_1d(f, a, b, steps=100, driver=None): 
 171      h = (b - a)/steps 
 172   
 173      points = [] 
 174      data = [] 
 175      for n in range(steps): 
 176          x = a + h * n 
 177          points.append(x) 
 178          data.append(f(x)) 
 179   
 180      # autodetect driver 
 181      if driver is None: 
 182          try: 
 183              import pylab 
 184              driver = "matplotlib" 
 185          except ImportError: 
 186              pass 
 187      if driver is None: 
 188          try: 
 189              import Gnuplot 
 190              driver = "gnuplot" 
 191          except ImportError: 
 192              pass 
 193   
 194      # actually plot 
 195      if driver == "matplotlib": 
 196          from pylab import plot, show 
 197          plot(points, data) 
 198          show() 
 199      elif driver == "gnuplot": 
 200          from Gnuplot import Gnuplot, Data 
 201          gp = Gnuplot() 
 202          gp.plot(Data(points, data)) 
 203          raw_input() 
 204      else: 
 205          raise ValueError, "invalid plot driver '%s'" % driver 
 206   
 207   
 208   
 209   
 210  # obj array helpers ----------------------------------------------------------- 
 211 -def is_obj_array(val): 
 212      try: 
 213          return val.dtype == object 
 214      except AttributeError: 
 215          return False 
 216   
 217   
 218   
 219   
 220 -def to_obj_array(ary): 
 221      ls = log_shape(ary) 
 222      result = numpy.empty(ls, dtype=object) 
 223   
 224      from pytools import indices_in_shape 
 225      for i in indices_in_shape(ls): 
 226          result[i] = ary[i] 
 227   
 228      return result 
 229   
 230   
 231   
 232   
 233 -def is_field_equal(a, b): 
 234      if is_obj_array(a): 
 235          return is_obj_array(b) and (a == b).all() 
 236      else: 
 237          return not is_obj_array(b) and a == b 
 238   
 239   
 240   
 241   
 242 -def make_obj_array(res_list): 
 243      result = numpy.empty((len(res_list),), dtype=object) 
 244      for i, v in enumerate(res_list): 
 245          result[i] = v 
 246   
 247      return result 
 248   
 249   
 250   
 251   
 252 -def setify_field(f): 
 253      from hedge.tools import is_obj_array 
 254      if is_obj_array(f): 
 255          return set(f) 
 256      else: 
 257          return set([f]) 
 258   
 259   
 260   
 261   
 262 -def hashable_field(f): 
 263      if is_obj_array(f): 
 264          return tuple(f) 
 265      else: 
 266          return f 
 267   
 268   
 269   
 270   
 271 -def field_equal(a, b): 
 272      a_is_oa = is_obj_array(a) 
 273      assert a_is_oa == is_obj_array(b) 
 274   
 275      if a_is_oa: 
 276          return (a == b).all() 
 277      else: 
 278          return a == b 
 279   
 280   
 281   
 282   
 283 -def join_fields(*args): 
 284      res_list = [] 
 285      for arg in args: 
 286          if isinstance(arg, list): 
 287              res_list.extend(arg) 
 288          elif isinstance(arg, numpy.ndarray): 
 289              if log_shape(arg) == (): 
 290                  res_list.append(arg) 
 291              else: 
 292                  res_list.extend(arg) 
 293          else: 
 294              res_list.append(arg) 
 295   
 296      return make_obj_array(res_list) 
 297   
 298   
 299   
 300   
 301 -def log_shape(array): 
 302      """Returns the "logical shape" of the array. 
 303   
 304      The "logical shape" is the shape that's left when the node-depending 
 305      dimension has been eliminated.""" 
 306   
 307      try: 
 308          if array.dtype.char == "O": 
 309              return array.shape 
 310          else: 
 311              return array.shape[:-1] 
 312      except AttributeError: 
 313          return () 
 314   
 315   
 316   
 317   
 318 -def with_object_array_or_scalar(f, field): 
 319      ls = log_shape(field) 
 320      if ls != (): 
 321          from pytools import indices_in_shape 
 322          result = numpy.zeros(ls, dtype=object) 
 323          for i in indices_in_shape(ls): 
 324              result[i] = f(field[i]) 
 325          return result 
 326      else: 
 327          return f(field) 
 328   
 329   
 330   
 331   
 332 -def ptwise_mul(a, b): 
 333      a_log_shape = log_shape(a) 
 334      b_log_shape = log_shape(b) 
 335   
 336      from pytools import indices_in_shape 
 337   
 338      if a_log_shape == (): 
 339          result = numpy.empty(b_log_shape, dtype=object) 
 340          for i in indices_in_shape(b_log_shape): 
 341              result[i] = a*b[i] 
 342      elif b_log_shape == (): 
 343          result = numpy.empty(a_log_shape, dtype=object) 
 344          for i in indices_in_shape(a_log_shape): 
 345              result[i] = a[i]*b 
 346      else: 
 347          raise ValueError, "ptwise_mul can't handle two non-scalars" 
 348   
 349      return result 
 350   
 351   
 352   
 353   
 354 -def ptwise_dot(logdims1, logdims2, a1, a2): 
 355      a1_log_shape = a1.shape[:logdims1] 
 356      a2_log_shape = a1.shape[:logdims2] 
 357   
 358      assert a1_log_shape[-1] == a2_log_shape[0] 
 359      len_k = a2_log_shape[0] 
 360   
 361      result = numpy.empty(a1_log_shape[:-1]+a2_log_shape[1:], dtype=object) 
 362   
 363      from pytools import indices_in_shape 
 364      for a1_i in indices_in_shape(a1_log_shape[:-1]): 
 365          for a2_i in indices_in_shape(a2_log_shape[1:]): 
 366              result[a1_i+a2_i] = sum( 
 367                      a1[a1_i+(k,)] * a2[(k,)+a2_i] 
 368                      for k in xrange(len_k) 
 369                      ) 
 370   
 371      if result.shape == (): 
 372          return result[()] 
 373      else: 
 374          return result 
 375   
 376   
 377   
 378   
 379 -def levi_civita(tuple): 
 380      """Compute an entry of the Levi-Civita tensor for the indices C{tuple}. 
 381   
 382      Only three-tuples are supported for now. 
 383      """ 
 384      if len(tuple) == 3: 
 385          if tuple in [(0,1,2), (2,0,1), (1,2,0)]: 
 386              return 1 
 387          elif tuple in [(2,1,0), (0,2,1), (1,0,2)]: 
 388              return -1 
 389          else: 
 390              return 0 
 391      else: 
 392          raise NotImplementedError 
 393   
 394   
 395   
 396   
 397 -def count_subset(subset): 
 398      from pytools import len_iterable 
 399      return len_iterable(uc for uc in subset if uc) 
 400   
 401   
 402   
 403   
 404 -def full_to_subset_indices(subset, base=0): 
 405      """Takes a sequence of bools and turns it into an array of indices 
 406      to be used to extract the subset from the full set. 
 407   
 408      Example: 
 409   
 410      >>> full_to_subset_indices([False, True, True]) 
 411      array([1 2]) 
 412      """ 
 413   
 414      result = [] 
 415      for i, is_in in enumerate(subset): 
 416          if is_in: 
 417              result.append(i + base) 
 418   
 419      return numpy.array(result, dtype=numpy.intp) 
 420   
 421   
 422   
 423 -def full_to_all_subset_indices(subsets, base=0): 
 424      """Takes a sequence of bools and generates it into an array of indices 
 425      to be used to extract the subset from the full set. 
 426   
 427      Example: 
 428   
 429      >>> list(full_to_all_subset_indices([[False, True, True], [True,False,True]])) 
 430      [array([1 2]), array([3 5] 
 431      """ 
 432   
 433      for subset in subsets: 
 434          result = [] 
 435          for i, is_in in enumerate(subset): 
 436              if is_in: 
 437                  result.append(i + base) 
 438          base += len(subset) 
 439   
 440          yield numpy.array(result, dtype=numpy.intp) 
 441   
 442   
 443   
 444 -def partial_to_all_subset_indices(subsets, base=0): 
 445      """Takes a sequence of bools and generates it into an array of indices 
 446      to be used to insert the subset into the full set. 
 447   
 448      Example: 
 449   
 450      >>> list(partial_to_all_subset_indices([[False, True, True], [True,False,True]])) 
 451      [array([0 1]), array([2 3] 
 452      """ 
 453   
 454      idx = base 
 455      for subset in subsets: 
 456          result = [] 
 457          for is_in in subset: 
 458              if is_in: 
 459                  result.append(idx) 
 460                  idx += 1 
 461   
 462          yield numpy.array(result, dtype=numpy.intp) 
 463   
 464   
 465   
 466 -class SubsettableCrossProduct: 
 467      """A cross product that can operate on an arbitrary subsets of its 
 468      two operands and return an arbitrary subset of its result. 
 469      """ 
 470   
 471      full_subset = (True, True, True) 
 472   
 473 -    def __init__(self, op1_subset=full_subset, op2_subset=full_subset, result_subset=full_subset): 
 474          """Construct a subset-able cross product. 
 475   
 476          @arg op1_subset: The subset of indices of operand 1 to be taken into account. 
 477            Given as a 3-sequence of bools. 
 478          @arg op2_subset: The subset of indices of operand 2 to be taken into account. 
 479            Given as a 3-sequence of bools. 
 480          @arg result_subset: The subset of indices of the result that are calculated. 
 481            Given as a 3-sequence of bools. 
 482          """ 
 483          def subset_indices(subset): 
 484              return [i for i, use_component in enumerate(subset) 
 485                      if use_component] 
 486   
 487          self.op1_subset = op1_subset 
 488          self.op2_subset = op2_subset 
 489          self.result_subset = result_subset 
 490   
 491          import pymbolic 
 492          op1 = pymbolic.var("x") 
 493          op2 = pymbolic.var("y") 
 494   
 495          self.functions = [] 
 496          self.component_lcjk = [] 
 497          for i, use_component in enumerate(result_subset): 
 498              if use_component: 
 499                  this_expr = 0 
 500                  this_component = [] 
 501                  for j, j_real in enumerate(subset_indices(op1_subset)): 
 502                      for k, k_real in enumerate(subset_indices(op2_subset)): 
 503                          lc = levi_civita((i, j_real, k_real)) 
 504                          if lc != 0: 
 505                              this_expr += lc*op1[j]*op2[k] 
 506                              this_component.append((lc, j, k)) 
 507                  self.functions.append(pymbolic.compile(this_expr, 
 508                      variables=[op1, op2])) 
 509                  self.component_lcjk.append(this_component) 
 510   
 511 -    def __call__(self, x, y, three_mult=None): 
 512          """Compute the subsetted cross product on the indexables C{x} and C{y}. 
 513   
 514          @arg three_mult: a function of three arguments C{sign, xj, yk} 
 515            used in place of the product C{sign*xj*yk}. Defaults to just this 
 516            product if not given. 
 517          """ 
 518          if three_mult is None: 
 519              return join_fields(*[f(x, y) for f in self.functions]) 
 520          else: 
 521              return join_fields( 
 522                      *[sum(three_mult(lc, x[j], y[k]) for lc, j, k in lcjk) 
 523                      for lcjk in self.component_lcjk]) 
 524   
 525   
 526   
 527   
 528  cross = SubsettableCrossProduct() 
 529   
 530   
 531   
 532   
 533 -def normalize(v): 
 534      return v/numpy.linalg.norm(v) 
 535   
 536   
 537   
 538   
 539 -def sign(x): 
 540      if x > 0: 
 541          return 1 
 542      elif x == 0: 
 543          return 0 
 544      else: 
 545          return -1 
 546   
 547   
 548   
 549   
 550 -def find_matching_vertices_along_axis(axis, points_a, points_b, numbers_a, numbers_b): 
 551      a_to_b = {} 
 552      not_found = [] 
 553   
 554      for i, pi in enumerate(points_a): 
 555          found = False 
 556          for j, pj in enumerate(points_b): 
 557              dist = pi-pj 
 558              dist[axis] = 0 
 559              if la.norm(dist) < 1e-12: 
 560                  a_to_b[numbers_a[i]] = numbers_b[j] 
 561                  found = True 
 562                  break 
 563          if not found: 
 564              not_found.append(numbers_a[i]) 
 565   
 566      return a_to_b, not_found 
 567   
 568   
 569   
 570   
 571  # linear algebra tools -------------------------------------------------------- 
 572 -def orthonormalize(vectors, discard_threshold=None): 
 573      """Carry out a modified [1] Gram-Schmidt orthonormalization on 
 574      vectors. 
 575   
 576      If, during orthonormalization, the 2-norm of a vector drops 
 577      below C{discard_threshold}, then this vector is silently 
 578      discarded. If C{discard_threshold} is C{None}, then no vector 
 579      will ever be dropped, and a zero 2-norm encountered during 
 580      orthonormalization will throw a C{RuntimeError}. 
 581   
 582      [1] U{http://en.wikipedia.org/wiki/Gram%E2%80%93Schmidt_process} 
 583      """ 
 584   
 585      from numpy import dot 
 586      done_vectors = [] 
 587   
 588      for v in vectors: 
 589          my_v = v.copy() 
 590          for done_v in done_vectors: 
 591              my_v = my_v - dot(my_v, done_v.conjugate()) * done_v 
 592          v_norm = la.norm(my_v) 
 593   
 594          if discard_threshold is None: 
 595              if v_norm == 0: 
 596                  raise RuntimeError, "Orthogonalization failed" 
 597          else: 
 598              if v_norm < discard_threshold: 
 599                  continue 
 600   
 601          my_v /= v_norm 
 602          done_vectors.append(my_v) 
 603   
 604      return done_vectors 
 605   
 606   
 607   
 608   
 609 -def permutation_matrix(to_indices=None, from_indices=None, h=None, w=None, 
 610          dtype=None, flavor=None): 
 611      """Return a permutation matrix. 
 612   
 613      If to_indices is specified, the resulting permutation 
 614      matrix P satisfies the condition 
 615   
 616      P * e[i] = e[to_indices[i]] for i=1,...,len(to_indices) 
 617   
 618      where e[i] is the i-th unit vector. The height of P is 
 619      determined either implicitly by the maximum of to_indices 
 620      or explicitly by the parameter h. 
 621   
 622      If from_indices is specified, the resulting permutation 
 623      matrix P satisfies the condition 
 624   
 625      P * e[from_indices[i]] = e[i] for i=1,...,len(from_indices) 
 626   
 627      where e[i] is the i-th unit vector. The width of P is 
 628      determined either implicitly by the maximum of from_indices 
 629      of explicitly by the parameter w. 
 630   
 631      If both to_indices and from_indices is specified, a ValueError 
 632      exception is raised. 
 633      """ 
 634      if to_indices is not None and from_indices is not None: 
 635          raise ValueError, "only one of to_indices and from_indices may " \ 
 636                  "be specified" 
 637   
 638      if to_indices is not None: 
 639          if h is None: 
 640              h = max(to_indices)+1 
 641          w = len(to_indices) 
 642      else: 
 643          if w is None: 
 644              w = max(from_indices)+1 
 645          h = len(from_indices) 
 646   
 647      if flavor is None: 
 648          result = numpy.zeros((h,w), dtype=dtype) 
 649   
 650          if to_indices is not None: 
 651              for j, i in enumerate(to_indices): 
 652                  result[i,j] = 1 
 653          else: 
 654              for i, j in enumerate(from_indices): 
 655                  result[i,j] = 1 
 656      else: 
 657          result = pyublas.zeros((h,w), dtype=dtype, flavor=flavor) 
 658   
 659          if to_indices is not None: 
 660              for j, i in enumerate(to_indices): 
 661                  result.add_element(i, j, 1) 
 662          else: 
 663              for i, j in enumerate(from_indices): 
 664                  result.add_element(i, j, 1) 
 665   
 666      return result 
 667   
 668   
 669   
 670   
 671 -def leftsolve(A, B): 
 672      return la.solve(A.T, B.T).T 
 673   
 674   
 675   
 676   
 677 -def unit_vector(n, i, dtype=None): 
 678      """Return the i-th unit vector of size n, with the given dtype.""" 
 679      result = numpy.zeros((n,), dtype=dtype) 
 680      result[i] = 1 
 681      return result 
 682   
 683   
 684   
 685   
 686  # eoc estimation -------------------------------------------------------------- 
 687 -def estimate_order_of_convergence(abscissae, errors): 
 688      """Assuming that abscissae and errors are connected by a law of the form 
 689   
 690      error = constant * abscissa ^ (-order), 
 691   
 692      this function finds, in a least-squares sense, the best approximation of 
 693      constant and order for the given data set. It returns a tuple (constant, order). 
 694      Both inputs must be PyLinear vectors. 
 695      """ 
 696      assert len(abscissae) == len(errors) 
 697      if len(abscissae) <= 1: 
 698          raise RuntimeError, "Need more than one value to guess order of convergence." 
 699   
 700      coefficients = numpy.polyfit(numpy.log10(abscissae), numpy.log10(errors), 1) 
 701      return 10**coefficients[-1], -coefficients[-2] 
 702   
 703   
 704   
 705   
 706 -class EOCRecorder(object): 
 707 -    def __init__(self): 
 708          self.history = [] 
 709   
 710 -    def add_data_point(self, abscissa, error): 
 711          self.history.append((abscissa, error)) 
 712   
 713 -    def estimate_order_of_convergence(self, gliding_mean = None): 
 714          abscissae = numpy.array([ a for a,e in self.history ]) 
 715          errors = numpy.array([ e for a,e in self.history ]) 
 716   
 717          size = len(abscissae) 
 718          if gliding_mean is None: 
 719              gliding_mean = size 
 720   
 721          data_points = size - gliding_mean + 1 
 722          result = numpy.zeros((data_points, 2), float) 
 723          for i in range(data_points): 
 724              result[i,0], result[i,1] = estimate_order_of_convergence( 
 725                  abscissae[i:i+gliding_mean], errors[i:i+gliding_mean]) 
 726          return result 
 727   
 728 -    def pretty_print(self, abscissa_label="N", error_label="Error", gliding_mean=2): 
 729          from pytools import Table 
 730   
 731          tbl = Table() 
 732          tbl.add_row((abscissa_label, error_label, "Running EOC")) 
 733   
 734          gm_eoc = self.estimate_order_of_convergence(gliding_mean) 
 735          for i, (absc, err) in enumerate(self.history): 
 736              if i < gliding_mean-1: 
 737                  tbl.add_row((str(absc), str(err), "")) 
 738              else: 
 739                  tbl.add_row((str(absc), str(err), str(gm_eoc[i-gliding_mean+1,1]))) 
 740   
 741          if len(self.history) > 1: 
 742              return str(tbl) + "\n\nOverall EOC: %s" % self.estimate_order_of_convergence()[0,1] 
 743          else: 
 744              return str(tbl) 
 745   
 746 -    def write_gnuplot_file(self, filename): 
 747          outfile = file(filename, "w") 
 748          for absc, err in self.history: 
 749              outfile.write("%f %f\n" % (absc, err)) 
 750          result = self.estimate_order_of_convergence() 
 751          const = result[0,0] 
 752          order = result[0,1] 
 753          outfile.write("\n") 
 754          for absc, err in self.history: 
 755              outfile.write("%f %f\n" % (absc, const * absc**(-order))) 
 756   
 757   
 758   
 759   
 760  # small utilities ------------------------------------------------------------- 
 761 -class Closable(object): 
 762 -    def __init__(self): 
 763          self.is_closed = False 
 764   
 765 -    def __enter__(self): 
 766          return self 
 767   
 768 -    def __exit__(self, exc_type, exc_value, traceback): 
 769          self.close() 
 770   
 771 -    def close(self): 
 772          if not self.is_closed: 
 773              # even if close attempt fails, consider ourselves closed still 
 774              try: 
 775                  self.do_close() 
 776              finally: 
 777                  self.is_closed = True 
 778   
 779   
 780   
 781   
 782 -def mem_checkpoint(name=None): 
 783      """Invoke the garbage collector and wait for a keypress.""" 
 784   
 785      import gc 
 786      gc.collect() 
 787      if name: 
 788          raw_input("%s -- hit Enter:" % name) 
 789      else: 
 790          raw_input("Enter:") 
 791   
 792   
 793   
 794   
 795   
 796  # index map tools ------------------------------------------------------------- 
 797 -def apply_index_map(imap, vector): 
 798      from hedge._internal import VectorTarget, perform_index_map 
 799   
 800      ls = log_shape(vector) 
 801      if ls == (): 
 802          result = numpy.zeros(shape=(imap.to_length,), dtype=float) 
 803          perform_index_map(imap, VectorTarget(vector, result)) 
 804      else: 
 805          result = numpy.zeros(shape=ls+(imap.to_length,), dtype=float) 
 806          from pytools import indices_in_shape 
 807          for i in indices_in_shape(ls): 
 808              perform_index_map(imap, VectorTarget(vector[i], result[i])) 
 809      return result 
 810   
 811   
 812   
 813   
 814 -def apply_inverse_index_map(imap, vector): 
 815      from hedge._internal import VectorTarget, perform_inverse_index_map 
 816   
 817      ls = log_shape(vector) 
 818      if ls == (): 
 819          result = numpy.zeros(shape=(imap.from_length,), dtype=float) 
 820          perform_inverse_index_map(imap, VectorTarget(vector, result)) 
 821      else: 
 822          result = numpy.zeros(shape=ls+(imap.from_length,), dtype=float) 
 823          from pytools import indices_in_shape 
 824          for i in indices_in_shape(ls): 
 825              perform_inverse_index_map(imap, VectorTarget(vector[i], result[i])) 
 826      return result 
 827   
 828   
 829   
 830   
 831  # mesh reorderings ------------------------------------------------------------ 
 832 -def cuthill_mckee(graph): 
 833      """Return a Cuthill-McKee ordering for the given graph. 
 834   
 835      See (for example) 
 836      Y. Saad, Iterative Methods for Sparse Linear System, 
 837      2nd edition, p. 76. 
 838   
 839      `graph' is given as an adjacency mapping, i.e. each node is 
 840      mapped to a list of its neighbors. 
 841      """ 
 842      from pytools import argmin 
 843   
 844      # this list is called "old_numbers" because it maps a 
 845      # "new number to its "old number" 
 846      old_numbers = [] 
 847      visited_nodes = set() 
 848      levelset = [] 
 849   
 850      all_nodes = set(graph.keys()) 
 851   
 852      def levelset_cmp(node_a, node_b): 
 853          return cmp(len(graph[node_a]), len(graph[node_b])) 
 854   
 855      while len(old_numbers) < len(graph): 
 856          if not levelset: 
 857              unvisited = list(set(graph.keys()) - visited_nodes) 
 858   
 859              if not unvisited: 
 860                  break 
 861   
 862              start_node = unvisited[ 
 863                      argmin(len(graph[node]) for node in unvisited)] 
 864              visited_nodes.add(start_node) 
 865              old_numbers.append(start_node) 
 866              levelset = [start_node] 
 867   
 868          next_levelset = set() 
 869          levelset.sort(levelset_cmp) 
 870   
 871          for node in levelset: 
 872              for neighbor in graph[node]: 
 873                  if neighbor in visited_nodes: 
 874                      continue 
 875   
 876                  visited_nodes.add(neighbor) 
 877                  next_levelset.add(neighbor) 
 878                  old_numbers.append(neighbor) 
 879   
 880          levelset = list(next_levelset) 
 881   
 882      return old_numbers 
 883   
 884   
 885   
 886   
 887 -def reverse_lookup_table(lut): 
 888      result = [None] * len(lut) 
 889      for key, value in enumerate(lut): 
 890          result[value] = key 
 891      return result 
 892   
 893   
 894   
 895   
 896  # block matrix ---------------------------------------------------------------- 
 897 -class BlockMatrix(object): 
 898      """A block matrix is the sum of different smaller 
 899      matrices positioned within one big matrix. 
 900      """ 
 901   
 902 -    def __init__(self, chunks): 
 903          """Return a new block matrix made up of components (`chunks') 
 904          given as triples (i,j,smaller_matrix), where the top left (0,0) 
 905          corner of the smaller_matrix is taken to be at position (i,j). 
 906   
 907          smaller_matrix may be anything that can be left-multiplied to 
 908          a Pylinear vector, including BlockMatrix instances. 
 909          """ 
 910          self.chunks = [] 
 911          for i, j, chunk in chunks: 
 912              if isinstance(chunk, BlockMatrix): 
 913                  self.chunks.extend( 
 914                          (i+subi, j+subj, subchunk) 
 915                          for subi, subj, subchunk in chunk.chunks) 
 916              else: 
 917                  self.chunks.append((i, j, chunk)) 
 918   
 919      @property 
 920 -    def T(self): 
 921          return BlockMatrix( 
 922                  (j, i, chunk.T) for i, j, chunk in self.chunks) 
 923   
 924      @property 
 925 -    def H(self): 
 926          return BlockMatrix( 
 927                  (j, i, chunk.H) for i, j, chunk in self.chunks) 
 928   
 929      @property 
 930 -    def shape(self): 
 931          return ( 
 932                  max(i+chunk.shape[0] for i, j, chunk in self.chunks), 
 933                  max(j+chunk.shape[0] for i, j, chunk in self.chunks) 
 934                  ) 
 935   
 936 -    def __add__(self, other): 
 937          if isinstance(other, BlockMatrix): 
 938              return BlockMatrix(self.chunks + other.chunks) 
 939          else: 
 940              return NotImplemented 
 941   
 942 -    def __neg__(self): 
 943          return BlockMatrix((i,j,-chunk) for i, j, chunk in self.chunks) 
 944   
 945 -    def __sub__(self, other): 
 946          if isinstance(other, BlockMatrix): 
 947              return BlockMatrix(self.chunks + (-other).chunks) 
 948          else: 
 949              return NotImplemented 
 950   
 951 -    def __mul__(self, other): 
 952          if num.Vector.is_a(other): 
 953              h, w = self.shape 
 954              assert len(other) == w 
 955              result = num.zeros((h,)) 
 956   
 957              for i, j, chunk in self.chunks: 
 958                  ch, cw = chunk.shape 
 959                  result[i:i+ch] += chunk * other[j:j+cw] 
 960   
 961              return result 
 962          elif isinstance(other, (float, complex, int)): 
 963              return BlockMatrix( 
 964                      (i,j,other*chunk) 
 965                      for i, j, chunk in self.chunks) 
 966          else: 
 967              return NotImplemented 
 968   
 969 -    def __rmul__(self, other): 
 970          if isinstance(other, (float, complex, int)): 
 971              return BlockMatrix( 
 972                      (i,j,other*chunk) 
 973                      for i, j, chunk in self.chunks) 
 974          else: 
 975              return NotImplemented 
 976   
 977 -    def add_to_build_matrix(self, bmat): 
 978          for i, j, chunk in self.chunks: 
 979              bmat.add_block(i, j, chunk) 
 980   
 981   
 982   
 983   
 984 -class IndexListRegistry(object): 
 985 -    def __init__(self, debug=False): 
 986          self.index_lists = [] 
 987          self.il_id_to_number = {} 
 988          self.il_to_number = {} 
 989          self.debug = debug 
 990   
 991 -    def register(self, identifier, generator): 
 992          try: 
 993              result = self.il_id_to_number[identifier] 
 994              if self.debug: 
 995                  assert generator() == self.index_lists[result], ( 
 996                          "identifier %s used for two different index lists" 
 997                          % str(identifier)) 
 998              return result 
 999          except KeyError: 
1000              il = generator() 
1001              try: 
1002                  nbr = self.il_to_number[il] 
1003              except KeyError: 
1004                  nbr = len(self.index_lists) 
1005                  self.index_lists.append(il) 
1006                  self.il_id_to_number[identifier] = nbr 
1007                  self.il_to_number[il] = nbr 
1008              else: 
1009                  self.il_id_to_number[identifier] = nbr 
1010              return nbr 
1011   
1012 -    def get_list_length(self): 
1013          from pytools import single_valued 
1014          return single_valued(len(il) for il in self.index_lists) 
1015   
1016   
1017   
1018   
1019   
1020  # diagnostics ----------------------------------------------------------------- 
1021 -def time_count_flop(func, timer, counter, flop_counter, flops, increment=1): 
1022      def wrapped_f(*args, **kwargs): 
1023          counter.add() 
1024          flop_counter.add(flops) 
1025          sub_timer = timer.start_sub_timer() 
1026          try: 
1027              return func(*args, **kwargs) 
1028          finally: 
1029              sub_timer.stop().submit() 
1030   
1031      return wrapped_f 
1032   
1033   
1034   
1035   
1036  # flop counting --------------------------------------------------------------- 
1037 -def diff_rst_flops(discr): 
1038      result = 0 
1039      for eg in discr.element_groups: 
1040          ldis = eg.local_discretization 
1041          result += ( 
1042                  2 # mul+add 
1043                  * ldis.node_count() * len(eg.members) 
1044                  * ldis.node_count() 
1045                  ) 
1046   
1047      return result 
1048   
1049   
1050   
1051   
1052 -def diff_rescale_one_flops(discr): 
1053      result = 0 
1054      for eg in discr.element_groups: 
1055          ldis = eg.local_discretization 
1056          result += ( 
1057                  # x,y,z rescale 
1058                  2 # mul+add 
1059                  * discr.dimensions 
1060                  * len(eg.members) * ldis.node_count() 
1061                  ) 
1062   
1063      return result 
1064   
1065   
1066   
1067   
1068 -def mass_flops(discr): 
1069      result = 0 
1070      for eg in discr.element_groups: 
1071          ldis = eg.local_discretization 
1072          result += ( 
1073                  2 # mul+add 
1074                  * ldis.node_count() * len(eg.members) 
1075                  * ldis.node_count() 
1076                  ) 
1077   
1078      result += len(discr.nodes) # jacobian rescale 
1079   
1080      return result 
1081   
1082   
1083   
1084   
1085 -def lift_flops(fg): 
1086      ldis = fg.ldis_loc 
1087      return ( 
1088              2 # mul+add 
1089              * ldis.face_node_count() 
1090              * ldis.face_count() 
1091              * ldis.node_count() 
1092              * fg.element_count() 
1093              ) 
1094   
1095   
1096   
1097   
1098 -def gather_flops(discr): 
1099      result = 0 
1100      for eg in discr.element_groups: 
1101          ldis = eg.local_discretization 
1102          result += ( 
1103                  ldis.face_node_count() 
1104                  * ldis.face_count() 
1105                  * len(eg.members) 
1106                  * (1 # facejac-mul 
1107                      + 2 * # int+ext 
1108                      3 # const-mul, normal-mul, add 
1109                      ) 
1110                  ) 
1111   
1112      return result 
1113   
1114   
1115   
1116   
1117 -def count_dofs(vec): 
1118      if isinstance(vec, numpy.ndarray): 
1119          if vec.dtype == object: 
1120              from pytools import indices_in_shape 
1121              return sum(count_dofs(vec[i]) 
1122                      for i in indices_in_shape(vec.shape)) 
1123          else: 
1124              return vec.size 
1125      else: 
1126          return 0 
1127   
1128   
1129   
1130   
1131  # flux creation --------------------------------------------------------------- 
1132 -def make_lax_friedrichs_flux(wave_speed, state, flux_func, bdry_tags_and_states, strong): 
1133      from hedge.flux import make_normal, FluxVectorPlaceholder 
1134   
1135      fluxes = flux_func(state) 
1136   
1137      n = len(state) 
1138      d = len(fluxes) 
1139      normal = make_normal(d) 
1140      fvph = FluxVectorPlaceholder(len(state)*(1+d)+1) 
1141   
1142      wave_speed_ph = fvph[0] 
1143      state_ph = fvph[1:1+n] 
1144      fluxes_ph = [fvph[1+i*n:1+(i+1)*n] for i in range(1, d+1)] 
1145   
1146      penalty = wave_speed_ph.int*(state_ph.ext-state_ph.int) 
1147   
1148      if not strong: 
1149          flux = 0.5*(sum(n_i*(f_i.int+f_i.ext) for n_i, f_i in zip(normal, fluxes_ph)) 
1150                  - penalty) 
1151      else: 
1152          flux = 0.5*(sum(n_i*(f_i.int-f_i.ext) for n_i, f_i in zip(normal, fluxes_ph)) 
1153                  + penalty) 
1154   
1155      from hedge.optemplate import get_flux_operator 
1156      flux_op = get_flux_operator(flux) 
1157      int_operand = join_fields(wave_speed, state, *fluxes) 
1158   
1159      from hedge.optemplate import pair_with_boundary 
1160      return (flux_op*int_operand 
1161              + sum( 
1162                  flux_op*pair_with_boundary(int_operand, 
1163                      join_fields(0, ext_state, *flux_func(ext_state)), tag) 
1164                  for tag, ext_state in bdry_tags_and_states)) 
1165   
1166   
1167   
1168   
1169  # debug tools ----------------------------------------------------------------- 
1170 -def wait_for_keypress(discr): 
1171      """MPI-aware keypress wait""" 
1172      try: 
1173          comm = discr.parallel_discr.context.communicator 
1174      except AttributeError: 
1175          raw_input("[Enter]") 
1176      else: 
1177          if comm.rank == 0: 
1178              # OpenMPI connects mpirun's stdin to rank 0's stdin. 
1179              print "[Enter]" 
1180              raw_input() 
1181   
1182          from boostmpi import broadcast 
1183          broadcast(comm, value=0, root=0) 
1184   
1185 -def get_rank(discr): 
1186      """Rank query that works with and without MPI active.""" 
1187      try: 
1188          comm = discr.parallel_discr.context.communicator 
1189      except AttributeError: 
1190          return 0 
1191      else: 
1192          return comm.rank 
1193   
1194   
1195   
1196   
1197 -def typedump(value, max_seq=5, special_handlers={}): 
1198      from pytools import typedump 
1199      special_handlers = special_handlers.copy() 
1200      special_handlers.update({ 
1201          numpy.ndarray: lambda x: "array(%s, %s)" % (len(x.shape), x.dtype) 
1202          }) 
1203      return typedump(value, max_seq, special_handlers) 
1204   
1205   
1206   
1207   
1208  # futures --------------------------------------------------------------------- 
1209 -class Future(object): 
1210      """An abstract interface definition for futures. 
1211   
1212      See http://en.wikipedia.org/wiki/Future_(programming) 
1213      """ 
1214 -    def is_ready(self): 
1215          raise NotImplementedError(self.__class__) 
1216   
1217 -    def __call__(self): 
1218          raise NotImplementedError(self.__class__) 
1219   
1220   
1221   
1222   
1223 -class ImmediateFuture(Future): 
1224      """A non-future that immediately has a value available.""" 
1225 -    def __init__(self, value): 
1226          self.value = value 
1227   
1228 -    def is_ready(self): 
1229          return True 
1230   
1231 -    def __call__(self): 
1232          return self.value 
1233   
1234   
1235   
1236   
1237 -class NestedFuture(Future): 
1238      """A future that combines two sub-futures into one.""" 
1239 -    def __init__(self, outer_future_factory, inner_future): 
1240          self.outer_future_factory = outer_future_factory 
1241          self.inner_future = inner_future 
1242          self.outer_future = None 
1243   
1244 -    def is_ready(self): 
1245          if self.inner_future.is_ready(): 
1246              self.outer_future = self.outer_future_factory(self.inner_future()) 
1247              self.is_ready = self.outer_future.is_ready() 
1248              return self.is_ready() 
1249          else: 
1250              return False 
1251   
1252 -    def __call__(self): 
1253          if self.outer_future is None: 
1254              return self.outer_future_factory(self.inner_future())() 
1255          else: 
1256              return self.outer_future() 
1257