Package hedge :: Module tools
[hide private]
[frames] | no frames]

Source Code for Module hedge.tools

   1  """Miscellaneous helper facilities.""" 
   2   
   3  from __future__ import division 
   4   
   5  __copyright__ = "Copyright (C) 2007 Andreas Kloeckner" 
   6   
   7  __license__ = """ 
   8  This program is free software: you can redistribute it and/or modify 
   9  it under the terms of the GNU General Public License as published by 
  10  the Free Software Foundation, either version 3 of the License, or 
  11  (at your option) any later version. 
  12   
  13  This program is distributed in the hope that it will be useful, 
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  GNU General Public License for more details. 
  17   
  18  You should have received a copy of the GNU General Public License 
  19  along with this program.  If not, see U{http://www.gnu.org/licenses/}. 
  20  """ 
  21   
  22   
  23   
  24   
  25   
  26  import numpy 
  27  import numpy.linalg as la 
  28  import pyublas 
  29  import hedge._internal 
  30   
  31   
  32   
  33  ZeroVector = hedge._internal.ZeroVector 
  34  cyl_bessel_j = hedge._internal.cyl_bessel_j 
  35  cyl_neumann = hedge._internal.cyl_neumann 
36 37 38 39 40 -def is_zero(x):
41 return isinstance(x, (int, float)) and x == 0
42
43 44 45 46 -def relative_error(norm_diff, norm_true):
47 if norm_true == 0: 48 if norm_diff == 0: 49 return 0 50 else: 51 return float("inf") 52 else: 53 return norm_diff/norm_true
54
55 56 57 58 -def has_data_in_numpy_arrays(a, allow_objarray_levels):
59 if is_obj_array(a) and allow_objarray_levels > 0: 60 from pytools import indices_in_shape, all 61 return all( 62 has_data_in_numpy_arrays( 63 a[i], allow_objarray_levels=allow_objarray_levels-1) 64 for i in indices_in_shape(a.shape)) 65 else: 66 return isinstance(a, numpy.ndarray) and a.dtype != object
67
68 69 70 71 -def numpy_linear_comb(lin_comb):
72 assert lin_comb 73 74 scalar_dtypes = tuple(numpy.array(fac).dtype for fac, ary in lin_comb) 75 76 from pytools import single_valued, indices_in_shape, flatten, \ 77 match_precision 78 from codepy.elementwise import make_linear_comb_kernel 79 80 if single_valued(is_obj_array(ary) for fac, ary in lin_comb): 81 oa_shape = single_valued(ary.shape for fac, ary in lin_comb) 82 result = numpy.zeros(oa_shape, dtype=object) 83 for i in indices_in_shape(oa_shape): 84 el_shape = single_valued(ary[i].shape for fac, ary in lin_comb) 85 86 vector_dtypes = tuple(ary[i].dtype for fac, ary in lin_comb) 87 scalar_dtypes = tuple( 88 match_precision(sd, vd) 89 for sd, vd in zip(scalar_dtypes, vector_dtypes)) 90 91 kernel, result_dtype = make_linear_comb_kernel( 92 scalar_dtypes, vector_dtypes) 93 94 result[i] = numpy.zeros(el_shape, result_dtype) 95 kernel(result[i], *tuple(flatten((fac, ary[i]) for fac, ary in lin_comb))) 96 97 return result 98 else: 99 shape = single_valued(ary.shape for fac, ary in lin_comb) 100 vector_dtypes = tuple(ary.dtype for fac, ary in lin_comb) 101 scalar_dtypes = tuple( 102 match_precision(sd, vd) 103 for sd, vd in zip(scalar_dtypes, vector_dtypes)) 104 105 kernel, result_dtype = make_linear_comb_kernel( 106 scalar_dtypes, vector_dtypes) 107 108 result = numpy.zeros(shape, result_dtype) 109 kernel(result, *tuple(flatten(lin_comb))) 110 return result
111
112 113 114 115 -def mul_add(afac, a, bfac, b, add_timer=None):
116 if is_obj_array(a): 117 return numpy.array([ 118 mul_add(afac, a_i, bfac, b_i, add_timer=add_timer) 119 for a_i, b_i in zip(a, b)], 120 dtype=object) 121 else: 122 return a.mul_add(afac, b, bfac, add_timer=add_timer)
123
124 125 126 127 -def cyl_bessel_j_prime(nu, z):
128 if nu == 0: 129 if z == 0: 130 return 0 131 else: 132 return -cyl_bessel_j(nu+1, z)+nu/z*cyl_bessel_j(nu, z) 133 else: 134 return 0.5*(cyl_bessel_j(nu-1, z)-cyl_bessel_j(nu+1, z))
135 136 137 138 139 AffineMap = hedge._internal.AffineMap
140 -def _affine_map___getinitargs__(self):
141 return self.matrix, self.vector
142 143 AffineMap.__getinitargs__ = _affine_map___getinitargs__
144 145 146 147 148 -class Rotation(AffineMap):
149 - def __init__(self, angle):
150 # FIXME: Add axis, make multidimensional 151 from math import sin, cos 152 AffineMap.__init__(self, 153 numpy.array([ 154 [cos(angle), sin(angle)], 155 [-sin(angle), cos(angle)]]), 156 numpy.zeros((2,)))
157
158 159 160 161 -class Reflection(AffineMap):
162 - def __init__(self, axis, dim):
163 mat = numpy.identity(dim) 164 mat[axis,axis] = -1 165 AffineMap.__init__(self, mat, numpy.zeros((dim,)))
166
167 168 169 170 -def plot_1d(f, a, b, steps=100, driver=None):
171 h = (b - a)/steps 172 173 points = [] 174 data = [] 175 for n in range(steps): 176 x = a + h * n 177 points.append(x) 178 data.append(f(x)) 179 180 # autodetect driver 181 if driver is None: 182 try: 183 import pylab 184 driver = "matplotlib" 185 except ImportError: 186 pass 187 if driver is None: 188 try: 189 import Gnuplot 190 driver = "gnuplot" 191 except ImportError: 192 pass 193 194 # actually plot 195 if driver == "matplotlib": 196 from pylab import plot, show 197 plot(points, data) 198 show() 199 elif driver == "gnuplot": 200 from Gnuplot import Gnuplot, Data 201 gp = Gnuplot() 202 gp.plot(Data(points, data)) 203 raw_input() 204 else: 205 raise ValueError, "invalid plot driver '%s'" % driver
206
207 208 209 210 # obj array helpers ----------------------------------------------------------- 211 -def is_obj_array(val):
212 try: 213 return val.dtype == object 214 except AttributeError: 215 return False
216
217 218 219 220 -def to_obj_array(ary):
221 ls = log_shape(ary) 222 result = numpy.empty(ls, dtype=object) 223 224 from pytools import indices_in_shape 225 for i in indices_in_shape(ls): 226 result[i] = ary[i] 227 228 return result
229
230 231 232 233 -def is_field_equal(a, b):
234 if is_obj_array(a): 235 return is_obj_array(b) and (a == b).all() 236 else: 237 return not is_obj_array(b) and a == b
238
239 240 241 242 -def make_obj_array(res_list):
243 result = numpy.empty((len(res_list),), dtype=object) 244 for i, v in enumerate(res_list): 245 result[i] = v 246 247 return result
248
249 250 251 252 -def setify_field(f):
253 from hedge.tools import is_obj_array 254 if is_obj_array(f): 255 return set(f) 256 else: 257 return set([f])
258
259 260 261 262 -def hashable_field(f):
263 if is_obj_array(f): 264 return tuple(f) 265 else: 266 return f
267
268 269 270 271 -def field_equal(a, b):
272 a_is_oa = is_obj_array(a) 273 assert a_is_oa == is_obj_array(b) 274 275 if a_is_oa: 276 return (a == b).all() 277 else: 278 return a == b
279
280 281 282 283 -def join_fields(*args):
284 res_list = [] 285 for arg in args: 286 if isinstance(arg, list): 287 res_list.extend(arg) 288 elif isinstance(arg, numpy.ndarray): 289 if log_shape(arg) == (): 290 res_list.append(arg) 291 else: 292 res_list.extend(arg) 293 else: 294 res_list.append(arg) 295 296 return make_obj_array(res_list)
297
298 299 300 301 -def log_shape(array):
302 """Returns the "logical shape" of the array. 303 304 The "logical shape" is the shape that's left when the node-depending 305 dimension has been eliminated.""" 306 307 try: 308 if array.dtype.char == "O": 309 return array.shape 310 else: 311 return array.shape[:-1] 312 except AttributeError: 313 return ()
314
315 316 317 318 -def with_object_array_or_scalar(f, field):
319 ls = log_shape(field) 320 if ls != (): 321 from pytools import indices_in_shape 322 result = numpy.zeros(ls, dtype=object) 323 for i in indices_in_shape(ls): 324 result[i] = f(field[i]) 325 return result 326 else: 327 return f(field)
328
329 330 331 332 -def ptwise_mul(a, b):
333 a_log_shape = log_shape(a) 334 b_log_shape = log_shape(b) 335 336 from pytools import indices_in_shape 337 338 if a_log_shape == (): 339 result = numpy.empty(b_log_shape, dtype=object) 340 for i in indices_in_shape(b_log_shape): 341 result[i] = a*b[i] 342 elif b_log_shape == (): 343 result = numpy.empty(a_log_shape, dtype=object) 344 for i in indices_in_shape(a_log_shape): 345 result[i] = a[i]*b 346 else: 347 raise ValueError, "ptwise_mul can't handle two non-scalars" 348 349 return result
350
351 352 353 354 -def ptwise_dot(logdims1, logdims2, a1, a2):
355 a1_log_shape = a1.shape[:logdims1] 356 a2_log_shape = a1.shape[:logdims2] 357 358 assert a1_log_shape[-1] == a2_log_shape[0] 359 len_k = a2_log_shape[0] 360 361 result = numpy.empty(a1_log_shape[:-1]+a2_log_shape[1:], dtype=object) 362 363 from pytools import indices_in_shape 364 for a1_i in indices_in_shape(a1_log_shape[:-1]): 365 for a2_i in indices_in_shape(a2_log_shape[1:]): 366 result[a1_i+a2_i] = sum( 367 a1[a1_i+(k,)] * a2[(k,)+a2_i] 368 for k in xrange(len_k) 369 ) 370 371 if result.shape == (): 372 return result[()] 373 else: 374 return result
375
376 377 378 379 -def levi_civita(tuple):
380 """Compute an entry of the Levi-Civita tensor for the indices C{tuple}. 381 382 Only three-tuples are supported for now. 383 """ 384 if len(tuple) == 3: 385 if tuple in [(0,1,2), (2,0,1), (1,2,0)]: 386 return 1 387 elif tuple in [(2,1,0), (0,2,1), (1,0,2)]: 388 return -1 389 else: 390 return 0 391 else: 392 raise NotImplementedError
393
394 395 396 397 -def count_subset(subset):
398 from pytools import len_iterable 399 return len_iterable(uc for uc in subset if uc)
400
401 402 403 404 -def full_to_subset_indices(subset, base=0):
405 """Takes a sequence of bools and turns it into an array of indices 406 to be used to extract the subset from the full set. 407 408 Example: 409 410 >>> full_to_subset_indices([False, True, True]) 411 array([1 2]) 412 """ 413 414 result = [] 415 for i, is_in in enumerate(subset): 416 if is_in: 417 result.append(i + base) 418 419 return numpy.array(result, dtype=numpy.intp)
420
421 422 423 -def full_to_all_subset_indices(subsets, base=0):
424 """Takes a sequence of bools and generates it into an array of indices 425 to be used to extract the subset from the full set. 426 427 Example: 428 429 >>> list(full_to_all_subset_indices([[False, True, True], [True,False,True]])) 430 [array([1 2]), array([3 5] 431 """ 432 433 for subset in subsets: 434 result = [] 435 for i, is_in in enumerate(subset): 436 if is_in: 437 result.append(i + base) 438 base += len(subset) 439 440 yield numpy.array(result, dtype=numpy.intp)
441
442 443 444 -def partial_to_all_subset_indices(subsets, base=0):
445 """Takes a sequence of bools and generates it into an array of indices 446 to be used to insert the subset into the full set. 447 448 Example: 449 450 >>> list(partial_to_all_subset_indices([[False, True, True], [True,False,True]])) 451 [array([0 1]), array([2 3] 452 """ 453 454 idx = base 455 for subset in subsets: 456 result = [] 457 for is_in in subset: 458 if is_in: 459 result.append(idx) 460 idx += 1 461 462 yield numpy.array(result, dtype=numpy.intp)
463
464 465 466 -class SubsettableCrossProduct:
467 """A cross product that can operate on an arbitrary subsets of its 468 two operands and return an arbitrary subset of its result. 469 """ 470 471 full_subset = (True, True, True) 472
473 - def __init__(self, op1_subset=full_subset, op2_subset=full_subset, result_subset=full_subset):
474 """Construct a subset-able cross product. 475 476 @arg op1_subset: The subset of indices of operand 1 to be taken into account. 477 Given as a 3-sequence of bools. 478 @arg op2_subset: The subset of indices of operand 2 to be taken into account. 479 Given as a 3-sequence of bools. 480 @arg result_subset: The subset of indices of the result that are calculated. 481 Given as a 3-sequence of bools. 482 """ 483 def subset_indices(subset): 484 return [i for i, use_component in enumerate(subset) 485 if use_component]
486 487 self.op1_subset = op1_subset 488 self.op2_subset = op2_subset 489 self.result_subset = result_subset 490 491 import pymbolic 492 op1 = pymbolic.var("x") 493 op2 = pymbolic.var("y") 494 495 self.functions = [] 496 self.component_lcjk = [] 497 for i, use_component in enumerate(result_subset): 498 if use_component: 499 this_expr = 0 500 this_component = [] 501 for j, j_real in enumerate(subset_indices(op1_subset)): 502 for k, k_real in enumerate(subset_indices(op2_subset)): 503 lc = levi_civita((i, j_real, k_real)) 504 if lc != 0: 505 this_expr += lc*op1[j]*op2[k] 506 this_component.append((lc, j, k)) 507 self.functions.append(pymbolic.compile(this_expr, 508 variables=[op1, op2])) 509 self.component_lcjk.append(this_component)
510
511 - def __call__(self, x, y, three_mult=None):
512 """Compute the subsetted cross product on the indexables C{x} and C{y}. 513 514 @arg three_mult: a function of three arguments C{sign, xj, yk} 515 used in place of the product C{sign*xj*yk}. Defaults to just this 516 product if not given. 517 """ 518 if three_mult is None: 519 return join_fields(*[f(x, y) for f in self.functions]) 520 else: 521 return join_fields( 522 *[sum(three_mult(lc, x[j], y[k]) for lc, j, k in lcjk) 523 for lcjk in self.component_lcjk])
524 525 526 527 528 cross = SubsettableCrossProduct()
529 530 531 532 533 -def normalize(v):
534 return v/numpy.linalg.norm(v)
535
536 537 538 539 -def sign(x):
540 if x > 0: 541 return 1 542 elif x == 0: 543 return 0 544 else: 545 return -1
546
547 548 549 550 -def find_matching_vertices_along_axis(axis, points_a, points_b, numbers_a, numbers_b):
551 a_to_b = {} 552 not_found = [] 553 554 for i, pi in enumerate(points_a): 555 found = False 556 for j, pj in enumerate(points_b): 557 dist = pi-pj 558 dist[axis] = 0 559 if la.norm(dist) < 1e-12: 560 a_to_b[numbers_a[i]] = numbers_b[j] 561 found = True 562 break 563 if not found: 564 not_found.append(numbers_a[i]) 565 566 return a_to_b, not_found
567
568 569 570 571 # linear algebra tools -------------------------------------------------------- 572 -def orthonormalize(vectors, discard_threshold=None):
573 """Carry out a modified [1] Gram-Schmidt orthonormalization on 574 vectors. 575 576 If, during orthonormalization, the 2-norm of a vector drops 577 below C{discard_threshold}, then this vector is silently 578 discarded. If C{discard_threshold} is C{None}, then no vector 579 will ever be dropped, and a zero 2-norm encountered during 580 orthonormalization will throw a C{RuntimeError}. 581 582 [1] U{http://en.wikipedia.org/wiki/Gram%E2%80%93Schmidt_process} 583 """ 584 585 from numpy import dot 586 done_vectors = [] 587 588 for v in vectors: 589 my_v = v.copy() 590 for done_v in done_vectors: 591 my_v = my_v - dot(my_v, done_v.conjugate()) * done_v 592 v_norm = la.norm(my_v) 593 594 if discard_threshold is None: 595 if v_norm == 0: 596 raise RuntimeError, "Orthogonalization failed" 597 else: 598 if v_norm < discard_threshold: 599 continue 600 601 my_v /= v_norm 602 done_vectors.append(my_v) 603 604 return done_vectors
605
606 607 608 609 -def permutation_matrix(to_indices=None, from_indices=None, h=None, w=None, 610 dtype=None, flavor=None):
611 """Return a permutation matrix. 612 613 If to_indices is specified, the resulting permutation 614 matrix P satisfies the condition 615 616 P * e[i] = e[to_indices[i]] for i=1,...,len(to_indices) 617 618 where e[i] is the i-th unit vector. The height of P is 619 determined either implicitly by the maximum of to_indices 620 or explicitly by the parameter h. 621 622 If from_indices is specified, the resulting permutation 623 matrix P satisfies the condition 624 625 P * e[from_indices[i]] = e[i] for i=1,...,len(from_indices) 626 627 where e[i] is the i-th unit vector. The width of P is 628 determined either implicitly by the maximum of from_indices 629 of explicitly by the parameter w. 630 631 If both to_indices and from_indices is specified, a ValueError 632 exception is raised. 633 """ 634 if to_indices is not None and from_indices is not None: 635 raise ValueError, "only one of to_indices and from_indices may " \ 636 "be specified" 637 638 if to_indices is not None: 639 if h is None: 640 h = max(to_indices)+1 641 w = len(to_indices) 642 else: 643 if w is None: 644 w = max(from_indices)+1 645 h = len(from_indices) 646 647 if flavor is None: 648 result = numpy.zeros((h,w), dtype=dtype) 649 650 if to_indices is not None: 651 for j, i in enumerate(to_indices): 652 result[i,j] = 1 653 else: 654 for i, j in enumerate(from_indices): 655 result[i,j] = 1 656 else: 657 result = pyublas.zeros((h,w), dtype=dtype, flavor=flavor) 658 659 if to_indices is not None: 660 for j, i in enumerate(to_indices): 661 result.add_element(i, j, 1) 662 else: 663 for i, j in enumerate(from_indices): 664 result.add_element(i, j, 1) 665 666 return result
667
668 669 670 671 -def leftsolve(A, B):
672 return la.solve(A.T, B.T).T
673
674 675 676 677 -def unit_vector(n, i, dtype=None):
678 """Return the i-th unit vector of size n, with the given dtype.""" 679 result = numpy.zeros((n,), dtype=dtype) 680 result[i] = 1 681 return result
682
683 684 685 686 # eoc estimation -------------------------------------------------------------- 687 -def estimate_order_of_convergence(abscissae, errors):
688 """Assuming that abscissae and errors are connected by a law of the form 689 690 error = constant * abscissa ^ (-order), 691 692 this function finds, in a least-squares sense, the best approximation of 693 constant and order for the given data set. It returns a tuple (constant, order). 694 Both inputs must be PyLinear vectors. 695 """ 696 assert len(abscissae) == len(errors) 697 if len(abscissae) <= 1: 698 raise RuntimeError, "Need more than one value to guess order of convergence." 699 700 coefficients = numpy.polyfit(numpy.log10(abscissae), numpy.log10(errors), 1) 701 return 10**coefficients[-1], -coefficients[-2]
702
703 704 705 706 -class EOCRecorder(object):
707 - def __init__(self):
708 self.history = []
709
710 - def add_data_point(self, abscissa, error):
711 self.history.append((abscissa, error))
712
713 - def estimate_order_of_convergence(self, gliding_mean = None):
714 abscissae = numpy.array([ a for a,e in self.history ]) 715 errors = numpy.array([ e for a,e in self.history ]) 716 717 size = len(abscissae) 718 if gliding_mean is None: 719 gliding_mean = size 720 721 data_points = size - gliding_mean + 1 722 result = numpy.zeros((data_points, 2), float) 723 for i in range(data_points): 724 result[i,0], result[i,1] = estimate_order_of_convergence( 725 abscissae[i:i+gliding_mean], errors[i:i+gliding_mean]) 726 return result
727
728 - def pretty_print(self, abscissa_label="N", error_label="Error", gliding_mean=2):
729 from pytools import Table 730 731 tbl = Table() 732 tbl.add_row((abscissa_label, error_label, "Running EOC")) 733 734 gm_eoc = self.estimate_order_of_convergence(gliding_mean) 735 for i, (absc, err) in enumerate(self.history): 736 if i < gliding_mean-1: 737 tbl.add_row((str(absc), str(err), "")) 738 else: 739 tbl.add_row((str(absc), str(err), str(gm_eoc[i-gliding_mean+1,1]))) 740 741 if len(self.history) > 1: 742 return str(tbl) + "\n\nOverall EOC: %s" % self.estimate_order_of_convergence()[0,1] 743 else: 744 return str(tbl)
745
746 - def write_gnuplot_file(self, filename):
747 outfile = file(filename, "w") 748 for absc, err in self.history: 749 outfile.write("%f %f\n" % (absc, err)) 750 result = self.estimate_order_of_convergence() 751 const = result[0,0] 752 order = result[0,1] 753 outfile.write("\n") 754 for absc, err in self.history: 755 outfile.write("%f %f\n" % (absc, const * absc**(-order)))
756
757 758 759 760 # small utilities ------------------------------------------------------------- 761 -class Closable(object):
762 - def __init__(self):
763 self.is_closed = False
764
765 - def __enter__(self):
766 return self
767
768 - def __exit__(self, exc_type, exc_value, traceback):
769 self.close()
770
771 - def close(self):
772 if not self.is_closed: 773 # even if close attempt fails, consider ourselves closed still 774 try: 775 self.do_close() 776 finally: 777 self.is_closed = True
778
779 780 781 782 -def mem_checkpoint(name=None):
783 """Invoke the garbage collector and wait for a keypress.""" 784 785 import gc 786 gc.collect() 787 if name: 788 raw_input("%s -- hit Enter:" % name) 789 else: 790 raw_input("Enter:")
791
792 793 794 795 796 # index map tools ------------------------------------------------------------- 797 -def apply_index_map(imap, vector):
798 from hedge._internal import VectorTarget, perform_index_map 799 800 ls = log_shape(vector) 801 if ls == (): 802 result = numpy.zeros(shape=(imap.to_length,), dtype=float) 803 perform_index_map(imap, VectorTarget(vector, result)) 804 else: 805 result = numpy.zeros(shape=ls+(imap.to_length,), dtype=float) 806 from pytools import indices_in_shape 807 for i in indices_in_shape(ls): 808 perform_index_map(imap, VectorTarget(vector[i], result[i])) 809 return result
810
811 812 813 814 -def apply_inverse_index_map(imap, vector):
815 from hedge._internal import VectorTarget, perform_inverse_index_map 816 817 ls = log_shape(vector) 818 if ls == (): 819 result = numpy.zeros(shape=(imap.from_length,), dtype=float) 820 perform_inverse_index_map(imap, VectorTarget(vector, result)) 821 else: 822 result = numpy.zeros(shape=ls+(imap.from_length,), dtype=float) 823 from pytools import indices_in_shape 824 for i in indices_in_shape(ls): 825 perform_inverse_index_map(imap, VectorTarget(vector[i], result[i])) 826 return result
827
828 829 830 831 # mesh reorderings ------------------------------------------------------------ 832 -def cuthill_mckee(graph):
833 """Return a Cuthill-McKee ordering for the given graph. 834 835 See (for example) 836 Y. Saad, Iterative Methods for Sparse Linear System, 837 2nd edition, p. 76. 838 839 `graph' is given as an adjacency mapping, i.e. each node is 840 mapped to a list of its neighbors. 841 """ 842 from pytools import argmin 843 844 # this list is called "old_numbers" because it maps a 845 # "new number to its "old number" 846 old_numbers = [] 847 visited_nodes = set() 848 levelset = [] 849 850 all_nodes = set(graph.keys()) 851 852 def levelset_cmp(node_a, node_b): 853 return cmp(len(graph[node_a]), len(graph[node_b]))
854 855 while len(old_numbers) < len(graph): 856 if not levelset: 857 unvisited = list(set(graph.keys()) - visited_nodes) 858 859 if not unvisited: 860 break 861 862 start_node = unvisited[ 863 argmin(len(graph[node]) for node in unvisited)] 864 visited_nodes.add(start_node) 865 old_numbers.append(start_node) 866 levelset = [start_node] 867 868 next_levelset = set() 869 levelset.sort(levelset_cmp) 870 871 for node in levelset: 872 for neighbor in graph[node]: 873 if neighbor in visited_nodes: 874 continue 875 876 visited_nodes.add(neighbor) 877 next_levelset.add(neighbor) 878 old_numbers.append(neighbor) 879 880 levelset = list(next_levelset) 881 882 return old_numbers 883
884 885 886 887 -def reverse_lookup_table(lut):
888 result = [None] * len(lut) 889 for key, value in enumerate(lut): 890 result[value] = key 891 return result
892
893 894 895 896 # block matrix ---------------------------------------------------------------- 897 -class BlockMatrix(object):
898 """A block matrix is the sum of different smaller 899 matrices positioned within one big matrix. 900 """ 901
902 - def __init__(self, chunks):
903 """Return a new block matrix made up of components (`chunks') 904 given as triples (i,j,smaller_matrix), where the top left (0,0) 905 corner of the smaller_matrix is taken to be at position (i,j). 906 907 smaller_matrix may be anything that can be left-multiplied to 908 a Pylinear vector, including BlockMatrix instances. 909 """ 910 self.chunks = [] 911 for i, j, chunk in chunks: 912 if isinstance(chunk, BlockMatrix): 913 self.chunks.extend( 914 (i+subi, j+subj, subchunk) 915 for subi, subj, subchunk in chunk.chunks) 916 else: 917 self.chunks.append((i, j, chunk))
918 919 @property
920 - def T(self):
921 return BlockMatrix( 922 (j, i, chunk.T) for i, j, chunk in self.chunks)
923 924 @property
925 - def H(self):
926 return BlockMatrix( 927 (j, i, chunk.H) for i, j, chunk in self.chunks)
928 929 @property
930 - def shape(self):
931 return ( 932 max(i+chunk.shape[0] for i, j, chunk in self.chunks), 933 max(j+chunk.shape[0] for i, j, chunk in self.chunks) 934 )
935
936 - def __add__(self, other):
937 if isinstance(other, BlockMatrix): 938 return BlockMatrix(self.chunks + other.chunks) 939 else: 940 return NotImplemented
941
942 - def __neg__(self):
943 return BlockMatrix((i,j,-chunk) for i, j, chunk in self.chunks)
944
945 - def __sub__(self, other):
946 if isinstance(other, BlockMatrix): 947 return BlockMatrix(self.chunks + (-other).chunks) 948 else: 949 return NotImplemented
950
951 - def __mul__(self, other):
952 if num.Vector.is_a(other): 953 h, w = self.shape 954 assert len(other) == w 955 result = num.zeros((h,)) 956 957 for i, j, chunk in self.chunks: 958 ch, cw = chunk.shape 959 result[i:i+ch] += chunk * other[j:j+cw] 960 961 return result 962 elif isinstance(other, (float, complex, int)): 963 return BlockMatrix( 964 (i,j,other*chunk) 965 for i, j, chunk in self.chunks) 966 else: 967 return NotImplemented
968
969 - def __rmul__(self, other):
970 if isinstance(other, (float, complex, int)): 971 return BlockMatrix( 972 (i,j,other*chunk) 973 for i, j, chunk in self.chunks) 974 else: 975 return NotImplemented
976
977 - def add_to_build_matrix(self, bmat):
978 for i, j, chunk in self.chunks: 979 bmat.add_block(i, j, chunk)
980
981 982 983 984 -class IndexListRegistry(object):
985 - def __init__(self, debug=False):
986 self.index_lists = [] 987 self.il_id_to_number = {} 988 self.il_to_number = {} 989 self.debug = debug
990
991 - def register(self, identifier, generator):
992 try: 993 result = self.il_id_to_number[identifier] 994 if self.debug: 995 assert generator() == self.index_lists[result], ( 996 "identifier %s used for two different index lists" 997 % str(identifier)) 998 return result 999 except KeyError: 1000 il = generator() 1001 try: 1002 nbr = self.il_to_number[il] 1003 except KeyError: 1004 nbr = len(self.index_lists) 1005 self.index_lists.append(il) 1006 self.il_id_to_number[identifier] = nbr 1007 self.il_to_number[il] = nbr 1008 else: 1009 self.il_id_to_number[identifier] = nbr 1010 return nbr
1011
1012 - def get_list_length(self):
1013 from pytools import single_valued 1014 return single_valued(len(il) for il in self.index_lists)
1015
1016 1017 1018 1019 1020 # diagnostics ----------------------------------------------------------------- 1021 -def time_count_flop(func, timer, counter, flop_counter, flops, increment=1):
1022 def wrapped_f(*args, **kwargs): 1023 counter.add() 1024 flop_counter.add(flops) 1025 sub_timer = timer.start_sub_timer() 1026 try: 1027 return func(*args, **kwargs) 1028 finally: 1029 sub_timer.stop().submit()
1030 1031 return wrapped_f 1032
1033 1034 1035 1036 # flop counting --------------------------------------------------------------- 1037 -def diff_rst_flops(discr):
1038 result = 0 1039 for eg in discr.element_groups: 1040 ldis = eg.local_discretization 1041 result += ( 1042 2 # mul+add 1043 * ldis.node_count() * len(eg.members) 1044 * ldis.node_count() 1045 ) 1046 1047 return result
1048
1049 1050 1051 1052 -def diff_rescale_one_flops(discr):
1053 result = 0 1054 for eg in discr.element_groups: 1055 ldis = eg.local_discretization 1056 result += ( 1057 # x,y,z rescale 1058 2 # mul+add 1059 * discr.dimensions 1060 * len(eg.members) * ldis.node_count() 1061 ) 1062 1063 return result
1064
1065 1066 1067 1068 -def mass_flops(discr):
1069 result = 0 1070 for eg in discr.element_groups: 1071 ldis = eg.local_discretization 1072 result += ( 1073 2 # mul+add 1074 * ldis.node_count() * len(eg.members) 1075 * ldis.node_count() 1076 ) 1077 1078 result += len(discr.nodes) # jacobian rescale 1079 1080 return result
1081
1082 1083 1084 1085 -def lift_flops(fg):
1086 ldis = fg.ldis_loc 1087 return ( 1088 2 # mul+add 1089 * ldis.face_node_count() 1090 * ldis.face_count() 1091 * ldis.node_count() 1092 * fg.element_count() 1093 )
1094
1095 1096 1097 1098 -def gather_flops(discr):
1099 result = 0 1100 for eg in discr.element_groups: 1101 ldis = eg.local_discretization 1102 result += ( 1103 ldis.face_node_count() 1104 * ldis.face_count() 1105 * len(eg.members) 1106 * (1 # facejac-mul 1107 + 2 * # int+ext 1108 3 # const-mul, normal-mul, add 1109 ) 1110 ) 1111 1112 return result
1113
1114 1115 1116 1117 -def count_dofs(vec):
1118 if isinstance(vec, numpy.ndarray): 1119 if vec.dtype == object: 1120 from pytools import indices_in_shape 1121 return sum(count_dofs(vec[i]) 1122 for i in indices_in_shape(vec.shape)) 1123 else: 1124 return vec.size 1125 else: 1126 return 0
1127
1128 1129 1130 1131 # flux creation --------------------------------------------------------------- 1132 -def make_lax_friedrichs_flux(wave_speed, state, flux_func, bdry_tags_and_states, strong):
1133 from hedge.flux import make_normal, FluxVectorPlaceholder 1134 1135 fluxes = flux_func(state) 1136 1137 n = len(state) 1138 d = len(fluxes) 1139 normal = make_normal(d) 1140 fvph = FluxVectorPlaceholder(len(state)*(1+d)+1) 1141 1142 wave_speed_ph = fvph[0] 1143 state_ph = fvph[1:1+n] 1144 fluxes_ph = [fvph[1+i*n:1+(i+1)*n] for i in range(1, d+1)] 1145 1146 penalty = wave_speed_ph.int*(state_ph.ext-state_ph.int) 1147 1148 if not strong: 1149 flux = 0.5*(sum(n_i*(f_i.int+f_i.ext) for n_i, f_i in zip(normal, fluxes_ph)) 1150 - penalty) 1151 else: 1152 flux = 0.5*(sum(n_i*(f_i.int-f_i.ext) for n_i, f_i in zip(normal, fluxes_ph)) 1153 + penalty) 1154 1155 from hedge.optemplate import get_flux_operator 1156 flux_op = get_flux_operator(flux) 1157 int_operand = join_fields(wave_speed, state, *fluxes) 1158 1159 from hedge.optemplate import pair_with_boundary 1160 return (flux_op*int_operand 1161 + sum( 1162 flux_op*pair_with_boundary(int_operand, 1163 join_fields(0, ext_state, *flux_func(ext_state)), tag) 1164 for tag, ext_state in bdry_tags_and_states))
1165
1166 1167 1168 1169 # debug tools ----------------------------------------------------------------- 1170 -def wait_for_keypress(discr):
1171 """MPI-aware keypress wait""" 1172 try: 1173 comm = discr.parallel_discr.context.communicator 1174 except AttributeError: 1175 raw_input("[Enter]") 1176 else: 1177 if comm.rank == 0: 1178 # OpenMPI connects mpirun's stdin to rank 0's stdin. 1179 print "[Enter]" 1180 raw_input() 1181 1182 from boostmpi import broadcast 1183 broadcast(comm, value=0, root=0)
1184
1185 -def get_rank(discr):
1186 """Rank query that works with and without MPI active.""" 1187 try: 1188 comm = discr.parallel_discr.context.communicator 1189 except AttributeError: 1190 return 0 1191 else: 1192 return comm.rank
1193
1194 1195 1196 1197 -def typedump(value, max_seq=5, special_handlers={}):
1198 from pytools import typedump 1199 special_handlers = special_handlers.copy() 1200 special_handlers.update({ 1201 numpy.ndarray: lambda x: "array(%s, %s)" % (len(x.shape), x.dtype) 1202 }) 1203 return typedump(value, max_seq, special_handlers)
1204
1205 1206 1207 1208 # futures --------------------------------------------------------------------- 1209 -class Future(object):
1210 """An abstract interface definition for futures. 1211 1212 See http://en.wikipedia.org/wiki/Future_(programming) 1213 """
1214 - def is_ready(self):
1215 raise NotImplementedError(self.__class__)
1216
1217 - def __call__(self):
1218 raise NotImplementedError(self.__class__)
1219
1220 1221 1222 1223 -class ImmediateFuture(Future):
1224 """A non-future that immediately has a value available."""
1225 - def __init__(self, value):
1226 self.value = value
1227
1228 - def is_ready(self):
1229 return True
1230
1231 - def __call__(self):
1232 return self.value
1233
1234 1235 1236 1237 -class NestedFuture(Future):
1238 """A future that combines two sub-futures into one."""
1239 - def __init__(self, outer_future_factory, inner_future):
1240 self.outer_future_factory = outer_future_factory 1241 self.inner_future = inner_future 1242 self.outer_future = None
1243
1244 - def is_ready(self):
1245 if self.inner_future.is_ready(): 1246 self.outer_future = self.outer_future_factory(self.inner_future()) 1247 self.is_ready = self.outer_future.is_ready() 1248 return self.is_ready() 1249 else: 1250 return False
1251
1252 - def __call__(self):
1253 if self.outer_future is None: 1254 return self.outer_future_factory(self.inner_future())() 1255 else: 1256 return self.outer_future()
1257