"""A dictionary extended with set operations. It has long been observed that sets can be emulated as dictionaries with dummy values (see `dict.fromkeys()`). The common operations, like `dict.update()` match directly. This module is simply a ''backport'' of the other goodies added in the ``sets`` module to dictionaries. The main issue with applying set operations are collisions: given two values (which can be different) for the same key in two DSets, which value should we use? Choosing either one would be wrong in most applications, so the user is allowed to provide his own function for reconciling them. He has to provide them per call - experience showed that the desired operation chages frequently even when invoking the same method. Values do not participate in the comparison of elements, to maintain compatibility with sets. If you want them to participate, make them part of the key. This means that generally, the operations don't have to be commutative (unlike with sets). To emphasize this, the __magic__ operator overloading methods are not provided - only the named methods are (they would also present a problem of how to indicate the collision handler). Other objects that are not mappings (e.g. lists or sets) are treated as mappings from their items to `None`. """ __author__ = "Beni Cherniavsky " __license__ = "LGPL " __url__ = 'http://cben-hacks.sf.net/python/' class UnhandledCollisionError(Exception): """Raised by default when keys in two DSets collide.""" pass def collision_unhandler(this, other): """Raise an `UnhandledCollisionError` upon a collision.""" raise UnhandledCollisionError class DSet(dict): """An extension of a dictionary with some of the methods of `sets.Set`. Most methods take an optional last argument, `collision`, that should be a function of two arguments that is called upon key collision, when the key should also be present in the result, so that it's unclear which value to use. Note that no checks whether the values are equal is made (think about a collision hadnler that sums the values to understand why). If the argument is not provided, an exception is raised upon a collision. """ # Redudant - `dict.fromkeys` does this just as well ## def fromkeys(cls, iterable, value=None): ## """New DSet with keys from `iterable` and values equal to `value`. ## ## >>> DSet.fromkeys([1]) == {1: None} ## True ## >>> DSet.fromkeys(range(3), 3) == {0: 3, 1: 3, 2: 3} ## True ## """ ## return cls(dict.fromkeys(iterable, value)) ## fromkeys = classmethod(fromkeys) def add(self, key, value=None, collision=collision_unhandler): """Add a key to a DSet. >>> d = DSet() >>> d.add(1) >>> d.add(2, 3) >>> d == {1: None, 2: 3} True >>> d.add(2, 4, collision=second) >>> d == {1: None, 2: 4} True """ try: old = self[key] except KeyError: self[key] = value else: self[key] = (collision or self.collision)(old, value) def difference(self, other): """Return new DSet with all items in `self` but not in `other`. >>> DSet({1: 2, 3: 4}).difference({1: 0}) == {3: 4} True """ res = self.__class__() for k, v in self.iteritems(): if k not in other: res[k] = v return res def difference_update(self, other): """Remove all keys in `other` from this dictset. >>> d = DSet({1: 2, 3: 4}) >>> d.difference_update([3, 5]) >>> d == {1: 2} True """ for k in other: self.discard(k) def discard(self, key): """Remove a key from a set if it is a member. >>> d = DSet({1: 2, 3: 4}) >>> d.discard(3) >>> d.discard(5) >>> d == {1: 2} True """ if key in self: del self[key] def intersection(self, other, collision=collision_unhandler): """Return new DSet with all items of `self` that are also in `other`. >>> DSet({1: 2, 3: 4}).intersection({1: 0}, collision=first) == {1: 2} True """ res = self.__class__() for k, v in _iteritems(other): if k in self: res[k] = collision(self[k], v) return res def intersection_update(self, other, collision=collision_unhandler): """Retain only items whose keys are in `other`. >>> d = DSet({1: 2, 3: 4}) >>> d.intersection_update([3, 5], collision=first) >>> d == {3: 4} True """ res = self.intersection(other, collision) self.clear() self.update(res) def issubset(self, other): """Check whether `other` contains all keys of `self`. >>> DSet({1: 2, 3: 4}).issubset({1: 0, 2: 0, 3: 0}) True >>> DSet({1: 2, 3: 4}).issubset({1: 0}) False """ for k in self: if k not in other: return False return True def issuperset(self, other): """Check whether `self` contains all keys of `other`. >>> DSet({1: 2, 3: 4}).issuperset({1: 0, 2: 0, 3: 0}) False >>> DSet({1: 2, 3: 4}).issuperset({1: 0}) True """ for k in other: if k not in self: return False return True def pop(self): """Remove and return an arbitrary key. >>> d = DSet({1: 2, 3: 4}) >>> keys = [d.pop(), d.pop()] >>> keys.sort() >>> keys [1, 3] >>> d == {} True """ return self.popitem()[0] remove = dict.__delitem__ def symmetric_difference(self, other): """Return new DSet with items from `self` or `other` but not both. >>> DSet({1: 2, 3: 4}).symmetric_difference({1: 0, 5: 6}) == {3: 4, 5: 6} True >>> DSet({1: 2, 3: 4}).symmetric_difference([1, 5]) == {3: 4, 5: None} True """ res = self.difference(other) for k, v in _iteritems(other): if k not in self: res[k] = v return res def symmetric_difference_update(self, other): """Make self into the symmetric difference of `self` and `other`. >>> s = DSet({1: 2, 3: 4}) >>> s.symmetric_difference_update([1, 5]) >>> s == {3: 4, 5: None} True """ for k, v in _iteritems(other): if k in self: del self[k] else: self[k] = v def union(self, other, collision=collision_unhandler): """Return new DSet with all items from `self` and from `other`. >>> DSet({1: 2, 3: 4}).union({3: 0, 5: 6}, collision=first) == {1: 2, 3: 4, 5: 6} True """ res = self.__class__(self) res.union_update(other, collision) return res def union_update(self, other, collision=collision_unhandler): """Add all itmes of `other` that are not in `self`. Note that when a key is present in both, this method prefers the values of `self`, unlike `update()`. >>> s = DSet({1: 2, 3: 4}) >>> s.union_update({3: 0, 5: 6}, collision=first) >>> s == {1: 2, 3: 4, 5: 6} True """ for k, v in _iteritems(other): if k in self: self[k] = collision(self[k], v) else: self[k] = v # Override `dict`'s unfriendliness to subclassing def copy(self): """Return a shallow copy.""" return DSet(self) __copy__ = copy # for the `copy` module # Bells & whistles, debugging def __repr__(self): return '%s(%s)' % (self.__class__.__name__, dict.__repr__(self)) def first(a, b): """Resolve collisions by returning the first value.""" return a def second(a, b): """Resolve collisions by returning the second value.""" return b def _iteritems(s): """Generate/return items of `s`, using `None` for values if needed. >>> dict(_iteritems({1: 2, 3: 4})) == {1: 2, 3: 4} True >>> list(_iteritems([1, 2])) [(1, None), (2, None)] """ try: return s.iteritems() except: try: return s.items() except: def gen(): for k in s: yield k, None return gen() def _test(): import doctest doctest.master = None # hack for clean repeated exec in emacs return doctest.testmod() if __name__ == "__main__": _test()