"""Lazy series abstraction tightly integrated with iterators.
This module exposes the duality between linked lists and iterators:
- Simple linked lists have the drawback that their tail must be known and
finite. This can be fixed by overloading head/tail access and making it
lazy; the resulting facility is popular in functional programming languages.
This is implemented by the `Series` classes.
- Iterators have the drawback of being stateful (advancing them is
destructive) but there is no way to capture the "remaining future" of an
iterator at some moment. This can be fixed by making an iterator explicitly
point to a series and allow separate iterators to be forked from it. This
is implemented by the `SeriesIter` class.
Another useful thing about linked lists is that you can non-destructively cons
items in front of them. This is implemented by the `Series` constructor (which is
also used to represent the part of series that has already been computed) and
is also accessible through the `SeriesIter.unyield()` method.
A ``__series__()`` protocol is defined, paralleling ``__iter__()``. Object
can implement it to provide special implementaion of series over them (but
normally you would simply implement ``__iter__()`` and `Series` will know to
wrap it). On series objects this method returns self.
A convenince subscripting interface is provided for series.
:See also:
- The `Streams chapter of SICP`__ shows how streams with a lazy tail can be
a very powerful abstraction.
__ http://mitpress.mit.edu/sicp/full-text/book/book-Z-H-24.html#%_sec_3.5
- `SRFI 40: A Library of Streams`__ specifies streams whose head is lazy
too, which is more elegant and predictable. It's also a better parallel
for iterators. This is the flavour implemented by this module.
__ http://srfi.schemers.org/srfi-40/srfi-40.html
- `Appendix A of Common Lisp the Language: Series`__ specifies a very
extensive set of operations on lazy sequences. It does *not* implement
them as linked lists and does not make it easy to take references into
the middle of a series; instead it statically optimizes code involving
series into very effecient loops. `Appendix B: Generators and
Gatherers`__, describes a facility very similar to Python's iterators and
their inverse (send multiple values to object, get summary value in the
end). It implements them as a thin layer upon the Series facility, which
is similar to what this module does with `SeriesIter`.
__ http://www.supelec.fr/docs/cltl/clm/node347.html
__ http://www.supelec.fr/docs/cltl/clm/node362.html
The name "streams" seems to be more widely known among functional
programmers and the "series" facility is somewhat different from what we
build here. Nevertheless, I chose "series" for this module since it has no
other accepted meanings while "streams" is overloaded with associations,
especially I/O...
"""
__author__ = "Beni Cherniavsky "
__license__ = "LGPL "
__version__ = "0.2+"
__url__ = 'http://cben-hacks.sf.net/python/'
# The order of definitions here is a bit bogus, because of the complex
# dependencies between the [meta]classes...
class _SeriesMetaclass(type):
"""Automatically create an corresponding lazy series class.
Series objects can be in two states: computed (a cons cell) or not (a
promise for a cons cell). For effeciency and elegancy(?), the state is
tracked simply by changing the __class__ of the object. Simply
transmuting to `Series` would be unfreidnly to subclasses of `Series` -
thier identity would be lost.
Instead, lazy series are implemented in a `_LazyMixin` which is
automagically combined with every subclass of `Series` to create a
corresponding lazy class and connect the pair of classes to each other.
The additional methods of the subclass will also work in the lazy class."""
def __init__(cls, name, bases, attrs):
super(_SeriesMetaclass, cls).__init__(name, bases, attrs)
# Leave alone lazy series classes, to prevent loops.
if _LazyMixin in bases:
return
class LazySeries(_LazyMixin, cls):
"""A series that has not been computed yet.
`head` is computed lazily, raising `StopIteration` for an empty
series. `tail` is another lazy series, accessing it implies
computing `head` (but does not compute the tail's head)."""
# Transmute to this when computing.
_SeriesClass = cls
__slots__ = []
# Optimization hack: We don't want to waste space in the
# `LazySeries` object for the iterator (which will not be needed
# as soon as it becomes a `Series`). So we stuff it into the
# `tail` slot. Our tail property masks it anyway, so we expose
# the `tail` slot as `_iterator`. Long live hardlinks!
_iterator = cls.tail
LazySeries.__name__ = 'Lazy' + name # ???
cls._LazySeriesClass = LazySeries
class _LazyMixin(object):
"""This almost implements an lazy `Series` subclass.
`_SeriesMetaclass` combines this properly with series classes (including
be `Series` itself) to create lazy classes."""
__slots__ = []
def __new__(cls, iterable):
"""Wrap a given `iterable` if it's not a steram already."""
try:
# Must not have multiple `LazySeries`s for same iterator.
# @@@ BUG: can't re-wrap with a subclass.
return iterable.__series__()
except AttributeError:
s = object.__new__(cls)
s._iterator = iter(iterable)
return s
# A fully lazy series wrapping an iterator can pass through 3 phases:
#
# 1. iterator has yet to compute `head`.
# 2. `head` computed, iterator has yet to compute `tail`.
# 3. `head` and `tail` known.
#
# Allowing phase 2 gives at most O(1) savings at the end of the series but
# incurs O(n) overhead during its construction and/or traversal.
# Therefore we directly jump from phase 1 to 3. This is not user-visible
# in any case.
#
# Phase 3 is realized by transmuting the object from a `LazySeries` to a
# `Series`.
def head(self):
iterator = self._iterator
head = iterator.next()
# Transmutation hack. Must be done *after* we know `StopIteration`
# wasn't raised. @@@ Unfriendly to subclasses!
self.__class__ = self._SeriesClass
self.head = head
self.tail = self._LazySeriesClass(iterator)
return self.head
head = property(head)
def tail(self):
self.head # force `head` to be known.
return self.tail
tail = property(tail)
lazy = True
def __nonzero__(self):
"""Forces computation of head to check if there is any.
>>> bool(Series(iter(lambda:1, None)))
True
>>> bool(Series([]))
False
"""
try:
self.head
return True
except StopIteration:
return False
def __repr__(self):
return "<%s(%r) at 0x%x>" % (self._SeriesClass.__name__,
self._iterator, id(self))
class Series(object):
"""An immutable possibly lazy linked list, with `head`, `tail` attributes.
The constructor of this class creates either a `Series` or an `LazySeries`
depending on whether you give it one or two arguments. Since `LazySeries`
objects turn into `Series` objects with time, you are not supposed to
worry about the distinction - both are just a `Series`. Note that a
`Series` can still be infinite because somewhere down the road it can have
an lazy tail.
>>> s = Series(iter(range(4)))
>>> s.head
0
>>> s.tail.head
1
>>> s.tail.tail.tail.tail.head
Traceback (most recent call last):
...
StopIteration
>>> s.tail.tail.head # Can go back as much we want.
2
>>> list(s)
[0, 1, 2, 3]
>>> a = range(3)
>>> b = Series(-1, a)
>>> c = Series(-10, a)
>>> list(b)
[-1, 0, 1, 2]
>>> list(c)
[-10, 0, 1, 2]
"""
__metaclass__ = _SeriesMetaclass
__slots__ = ['head', 'tail']
def __new__(cls, *args):
"""Series([head,] iterable) -> Series of [`head` preceding] `iterable`."""
try:
(head, iterable) = args
self = object.__new__(cls)
self.head = head
self.tail = cls._LazySeriesClass(iterable)
return self
except ValueError:
(iterable,) = args
return cls._LazySeriesClass(iterable)
def __iter__(self):
return SeriesIter(self)
def __series__(self):
"""This method is attempted before wrapping a `Series`."""
return self
def __getitem__(self, key):
"""A convenince subscripting interface.
This replaces an infinite set of conveninece interfaces to get the
head and tail in a tuple, etc.
A number key gives a specific value or raises `IndexError`. A finite
slice gives a list of values, possibly empty. An infinite slice
(``[start:]``) gives a tail of the series, possibly empty. Steps are
supported. Negative indexes/steps are not supported.
A tuple of keys gives a tuple of the values you would get for each
item.
>>> s = Series(range(4))
>>> s[2]
2
>>> s[1:3] == range(1, 3)
True
>>> list(s[2:]) == range(2, 4)
True
>>> s[1, 2:] == (s[1], s[2:])
True
>>> s[2:, 1, :] == (s[2:], s[1], s)
True
"""
# This method was rewritten by me many times. I tried to optimize
# indexing by tuple so that the list wouldn't have to be traversed
# many times by caching distant tails between the tuple's items; the
# complexity of this was so high that it's probably not worth it.
# Typical use cases for tuple indexing will use small indexes anyway,
# e.g. ``s[0, 1:]``.
if isinstance(key, tuple):
return tuple(map(self.__getitem__, key))
if isinstance(key, int):
try:
return self[key:].head
except StopIteration:
raise IndexError
try:
start = key.start or 0
stop = key.stop
step = key.step or 1
except AttributeError:
raise TypeError("Key must be an int, slice or tuple.")
if start < 0 or (stop and stop < 0) or step <= 0:
raise TypeError("Negative indexes/steps not supported.")
if stop is None and step == 1:
for _ in range(start):
try:
self = self.tail
except StopIteration:
break
return self
from itertools import islice
res = islice(self, start, stop, step)
if stop is not None:
return list(res)
return Series(res)
lazy = False
def force(self, count=None):
"""Force immediate computation of `count` values, all by default.
This only has effect on lazy `LazySeries` objects and the effect is
only on the moment of computation. If that can change the results,
perhaps this module is the wrong tool for you. Calling with no
`count` on infinite series will enter an infinite loop!
>>> s = Series(range(4))
>>> s.lazy
True
>>> s.force(2)
>>> s[1:].lazy # computed up to here
False
>>> s[2:].lazy # lazy boundary moved by 2
True
>>> s.force() # stops on StopIteration
>>> s[3:].lazy
False
"""
try:
if count is None: # @@@ awkward duplication
while True:
self = self.tail
else:
for _ in range(count):
self = self.tail
except StopIteration:
pass
def __nonzero__(self):
"""Checks whether the series is empty.
Hint: at least `n` elements exist iff ``self[n-1:]`` is non-empty."""
return True
def __repr__(self):
return "<%s(%r, ...)>" % (self.__class__.__name__, self.head)
class SeriesIter(object):
"""A copyable iterator over a series.
The `series` attribute points to the current series; it advances every
time you call `.next()`. Operations on one `SeriesIter` never affect any
other `SeriesIter` in any way since the underlying series are immutable.
>>> x = SeriesIter(range(5))
>>> x.next()
0
>>> y = x.copy() # take a snapshot
>>> [y.next(), y.next()]
[1, 2]
>>> [x.next(), x.next(), x.next()] # x and y are independent
[1, 2, 3]
>>> list(y.copy()) # non-destructive
[3, 4]
>>> list(y) # destructive
[3, 4]
>>> y.next()
Traceback (most recent call last):
...
StopIteration
>>> x.next() # independent even in their death
4
>>> x.unyield(-4) # can always cons up values
>>> y = x.copy()
>>> x.unyield(-3)
>>> y.unyield(0) # consing is independant
>>> list(x)
[-3, -4]
>>> list(y)
[0, -4]
"""
__slots__ = ['series']
def __init__(self, iterable):
"""Wrap a given iterable/series."""
if isinstance(iterable, SeriesIter):
# Avoid needless indirection
self.series = iterable.series
else:
self.series = Series(iterable)
def next(self):
"""Return head, advance to the tail."""
head = self.series.head # `StopIteration` can happen here
self.series = self.series.tail
return head
def copy(self):
"""Return a SeriesIter with the same series."""
return SeriesIter(self.series)
__copy__ = copy # in case somebody uses the copy module on us...
def unyield(self, value):
"""Series `value` in from of the series."""
self.series = Series(value, self.series)
def __iter__(self):
return self
def __series__(self):
return self.series
def __nonzero__(self):
"""Forces computation of head to check if there is any.
>>> bool(SeriesIter(iter(lambda:1, None)))
True
>>> bool(SeriesIter([]))
False
"""
return bool(self.series)
def __repr__(self):
return "iter(%r)" % (self.series,)
class _TestExample(Series):
"""
Test subclassing. The added method should work both on the subclass and the
>>> t = _TestExample(range(3))
>>> t[1:].quux()
True
>>> t.force()
>>> t[1:].quux()
False
"""
def quux(self):
return self.lazy
def _isprivate(prefix, name):
"""Avoid Series -> LazySeries -> Series recursion."""
# Should be doable with only one of them but doctest somewhy goes into
# infinite recursion...
return name in ('_SeriesClass', '_LazySeriesClass')
def _test():
import doctest
doctest.testmod(isprivate=_isprivate)
if __name__ == "__main__":
import doctest
doctest.master = None # hack for clean repeated exec in emacs
_test()