"""Lazy series abstraction tightly integrated with iterators. This module exposes the duality between linked lists and iterators: - Simple linked lists have the drawback that their tail must be known and finite. This can be fixed by overloading head/tail access and making it lazy; the resulting facility is popular in functional programming languages. This is implemented by the `Series` classes. - Iterators have the drawback of being stateful (advancing them is destructive) but there is no way to capture the "remaining future" of an iterator at some moment. This can be fixed by making an iterator explicitly point to a series and allow separate iterators to be forked from it. This is implemented by the `SeriesIter` class. Another useful thing about linked lists is that you can non-destructively cons items in front of them. This is implemented by the `Series` constructor (which is also used to represent the part of series that has already been computed) and is also accessible through the `SeriesIter.unyield()` method. A ``__series__()`` protocol is defined, paralleling ``__iter__()``. Object can implement it to provide special implementaion of series over them (but normally you would simply implement ``__iter__()`` and `Series` will know to wrap it). On series objects this method returns self. A convenince subscripting interface is provided for series. :See also: - The `Streams chapter of SICP`__ shows how streams with a lazy tail can be a very powerful abstraction. __ http://mitpress.mit.edu/sicp/full-text/book/book-Z-H-24.html#%_sec_3.5 - `SRFI 40: A Library of Streams`__ specifies streams whose head is lazy too, which is more elegant and predictable. It's also a better parallel for iterators. This is the flavour implemented by this module. __ http://srfi.schemers.org/srfi-40/srfi-40.html - `Appendix A of Common Lisp the Language: Series`__ specifies a very extensive set of operations on lazy sequences. It does *not* implement them as linked lists and does not make it easy to take references into the middle of a series; instead it statically optimizes code involving series into very effecient loops. `Appendix B: Generators and Gatherers`__, describes a facility very similar to Python's iterators and their inverse (send multiple values to object, get summary value in the end). It implements them as a thin layer upon the Series facility, which is similar to what this module does with `SeriesIter`. __ http://www.supelec.fr/docs/cltl/clm/node347.html __ http://www.supelec.fr/docs/cltl/clm/node362.html The name "streams" seems to be more widely known among functional programmers and the "series" facility is somewhat different from what we build here. Nevertheless, I chose "series" for this module since it has no other accepted meanings while "streams" is overloaded with associations, especially I/O... """ __author__ = "Beni Cherniavsky " __license__ = "LGPL " __version__ = "0.2+" __url__ = 'http://cben-hacks.sf.net/python/' # The order of definitions here is a bit bogus, because of the complex # dependencies between the [meta]classes... class _SeriesMetaclass(type): """Automatically create an corresponding lazy series class. Series objects can be in two states: computed (a cons cell) or not (a promise for a cons cell). For effeciency and elegancy(?), the state is tracked simply by changing the __class__ of the object. Simply transmuting to `Series` would be unfreidnly to subclasses of `Series` - thier identity would be lost. Instead, lazy series are implemented in a `_LazyMixin` which is automagically combined with every subclass of `Series` to create a corresponding lazy class and connect the pair of classes to each other. The additional methods of the subclass will also work in the lazy class.""" def __init__(cls, name, bases, attrs): super(_SeriesMetaclass, cls).__init__(name, bases, attrs) # Leave alone lazy series classes, to prevent loops. if _LazyMixin in bases: return class LazySeries(_LazyMixin, cls): """A series that has not been computed yet. `head` is computed lazily, raising `StopIteration` for an empty series. `tail` is another lazy series, accessing it implies computing `head` (but does not compute the tail's head).""" # Transmute to this when computing. _SeriesClass = cls __slots__ = [] # Optimization hack: We don't want to waste space in the # `LazySeries` object for the iterator (which will not be needed # as soon as it becomes a `Series`). So we stuff it into the # `tail` slot. Our tail property masks it anyway, so we expose # the `tail` slot as `_iterator`. Long live hardlinks! _iterator = cls.tail LazySeries.__name__ = 'Lazy' + name # ??? cls._LazySeriesClass = LazySeries class _LazyMixin(object): """This almost implements an lazy `Series` subclass. `_SeriesMetaclass` combines this properly with series classes (including be `Series` itself) to create lazy classes.""" __slots__ = [] def __new__(cls, iterable): """Wrap a given `iterable` if it's not a steram already.""" try: # Must not have multiple `LazySeries`s for same iterator. # @@@ BUG: can't re-wrap with a subclass. return iterable.__series__() except AttributeError: s = object.__new__(cls) s._iterator = iter(iterable) return s # A fully lazy series wrapping an iterator can pass through 3 phases: # # 1. iterator has yet to compute `head`. # 2. `head` computed, iterator has yet to compute `tail`. # 3. `head` and `tail` known. # # Allowing phase 2 gives at most O(1) savings at the end of the series but # incurs O(n) overhead during its construction and/or traversal. # Therefore we directly jump from phase 1 to 3. This is not user-visible # in any case. # # Phase 3 is realized by transmuting the object from a `LazySeries` to a # `Series`. def head(self): iterator = self._iterator head = iterator.next() # Transmutation hack. Must be done *after* we know `StopIteration` # wasn't raised. @@@ Unfriendly to subclasses! self.__class__ = self._SeriesClass self.head = head self.tail = self._LazySeriesClass(iterator) return self.head head = property(head) def tail(self): self.head # force `head` to be known. return self.tail tail = property(tail) lazy = True def __nonzero__(self): """Forces computation of head to check if there is any. >>> bool(Series(iter(lambda:1, None))) True >>> bool(Series([])) False """ try: self.head return True except StopIteration: return False def __repr__(self): return "<%s(%r) at 0x%x>" % (self._SeriesClass.__name__, self._iterator, id(self)) class Series(object): """An immutable possibly lazy linked list, with `head`, `tail` attributes. The constructor of this class creates either a `Series` or an `LazySeries` depending on whether you give it one or two arguments. Since `LazySeries` objects turn into `Series` objects with time, you are not supposed to worry about the distinction - both are just a `Series`. Note that a `Series` can still be infinite because somewhere down the road it can have an lazy tail. >>> s = Series(iter(range(4))) >>> s.head 0 >>> s.tail.head 1 >>> s.tail.tail.tail.tail.head Traceback (most recent call last): ... StopIteration >>> s.tail.tail.head # Can go back as much we want. 2 >>> list(s) [0, 1, 2, 3] >>> a = range(3) >>> b = Series(-1, a) >>> c = Series(-10, a) >>> list(b) [-1, 0, 1, 2] >>> list(c) [-10, 0, 1, 2] """ __metaclass__ = _SeriesMetaclass __slots__ = ['head', 'tail'] def __new__(cls, *args): """Series([head,] iterable) -> Series of [`head` preceding] `iterable`.""" try: (head, iterable) = args self = object.__new__(cls) self.head = head self.tail = cls._LazySeriesClass(iterable) return self except ValueError: (iterable,) = args return cls._LazySeriesClass(iterable) def __iter__(self): return SeriesIter(self) def __series__(self): """This method is attempted before wrapping a `Series`.""" return self def __getitem__(self, key): """A convenince subscripting interface. This replaces an infinite set of conveninece interfaces to get the head and tail in a tuple, etc. A number key gives a specific value or raises `IndexError`. A finite slice gives a list of values, possibly empty. An infinite slice (``[start:]``) gives a tail of the series, possibly empty. Steps are supported. Negative indexes/steps are not supported. A tuple of keys gives a tuple of the values you would get for each item. >>> s = Series(range(4)) >>> s[2] 2 >>> s[1:3] == range(1, 3) True >>> list(s[2:]) == range(2, 4) True >>> s[1, 2:] == (s[1], s[2:]) True >>> s[2:, 1, :] == (s[2:], s[1], s) True """ # This method was rewritten by me many times. I tried to optimize # indexing by tuple so that the list wouldn't have to be traversed # many times by caching distant tails between the tuple's items; the # complexity of this was so high that it's probably not worth it. # Typical use cases for tuple indexing will use small indexes anyway, # e.g. ``s[0, 1:]``. if isinstance(key, tuple): return tuple(map(self.__getitem__, key)) if isinstance(key, int): try: return self[key:].head except StopIteration: raise IndexError try: start = key.start or 0 stop = key.stop step = key.step or 1 except AttributeError: raise TypeError("Key must be an int, slice or tuple.") if start < 0 or (stop and stop < 0) or step <= 0: raise TypeError("Negative indexes/steps not supported.") if stop is None and step == 1: for _ in range(start): try: self = self.tail except StopIteration: break return self from itertools import islice res = islice(self, start, stop, step) if stop is not None: return list(res) return Series(res) lazy = False def force(self, count=None): """Force immediate computation of `count` values, all by default. This only has effect on lazy `LazySeries` objects and the effect is only on the moment of computation. If that can change the results, perhaps this module is the wrong tool for you. Calling with no `count` on infinite series will enter an infinite loop! >>> s = Series(range(4)) >>> s.lazy True >>> s.force(2) >>> s[1:].lazy # computed up to here False >>> s[2:].lazy # lazy boundary moved by 2 True >>> s.force() # stops on StopIteration >>> s[3:].lazy False """ try: if count is None: # @@@ awkward duplication while True: self = self.tail else: for _ in range(count): self = self.tail except StopIteration: pass def __nonzero__(self): """Checks whether the series is empty. Hint: at least `n` elements exist iff ``self[n-1:]`` is non-empty.""" return True def __repr__(self): return "<%s(%r, ...)>" % (self.__class__.__name__, self.head) class SeriesIter(object): """A copyable iterator over a series. The `series` attribute points to the current series; it advances every time you call `.next()`. Operations on one `SeriesIter` never affect any other `SeriesIter` in any way since the underlying series are immutable. >>> x = SeriesIter(range(5)) >>> x.next() 0 >>> y = x.copy() # take a snapshot >>> [y.next(), y.next()] [1, 2] >>> [x.next(), x.next(), x.next()] # x and y are independent [1, 2, 3] >>> list(y.copy()) # non-destructive [3, 4] >>> list(y) # destructive [3, 4] >>> y.next() Traceback (most recent call last): ... StopIteration >>> x.next() # independent even in their death 4 >>> x.unyield(-4) # can always cons up values >>> y = x.copy() >>> x.unyield(-3) >>> y.unyield(0) # consing is independant >>> list(x) [-3, -4] >>> list(y) [0, -4] """ __slots__ = ['series'] def __init__(self, iterable): """Wrap a given iterable/series.""" if isinstance(iterable, SeriesIter): # Avoid needless indirection self.series = iterable.series else: self.series = Series(iterable) def next(self): """Return head, advance to the tail.""" head = self.series.head # `StopIteration` can happen here self.series = self.series.tail return head def copy(self): """Return a SeriesIter with the same series.""" return SeriesIter(self.series) __copy__ = copy # in case somebody uses the copy module on us... def unyield(self, value): """Series `value` in from of the series.""" self.series = Series(value, self.series) def __iter__(self): return self def __series__(self): return self.series def __nonzero__(self): """Forces computation of head to check if there is any. >>> bool(SeriesIter(iter(lambda:1, None))) True >>> bool(SeriesIter([])) False """ return bool(self.series) def __repr__(self): return "iter(%r)" % (self.series,) class _TestExample(Series): """ Test subclassing. The added method should work both on the subclass and the >>> t = _TestExample(range(3)) >>> t[1:].quux() True >>> t.force() >>> t[1:].quux() False """ def quux(self): return self.lazy def _isprivate(prefix, name): """Avoid Series -> LazySeries -> Series recursion.""" # Should be doable with only one of them but doctest somewhy goes into # infinite recursion... return name in ('_SeriesClass', '_LazySeriesClass') def _test(): import doctest doctest.testmod(isprivate=_isprivate) if __name__ == "__main__": import doctest doctest.master = None # hack for clean repeated exec in emacs _test()