Add local version of the counter class
This commit is contained in:
190
src/obitools/collections.py
Normal file
190
src/obitools/collections.py
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
from operator import itemgetter
|
||||||
|
from heapq import nlargest
|
||||||
|
from itertools import repeat, ifilter
|
||||||
|
|
||||||
|
class Counter(dict):
|
||||||
|
'''Dict subclass for counting hashable objects. Sometimes called a bag
|
||||||
|
or multiset. Elements are stored as dictionary keys and their counts
|
||||||
|
are stored as dictionary values.
|
||||||
|
|
||||||
|
>>> Counter('zyzygy')
|
||||||
|
Counter({'y': 3, 'z': 2, 'g': 1})
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, iterable=None, **kwds):
|
||||||
|
'''Create a new, empty Counter object. And if given, count elements
|
||||||
|
from an input iterable. Or, initialize the count from another mapping
|
||||||
|
of elements to their counts.
|
||||||
|
|
||||||
|
>>> c = Counter() # a new, empty counter
|
||||||
|
>>> c = Counter('gallahad') # a new counter from an iterable
|
||||||
|
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
|
||||||
|
>>> c = Counter(a=4, b=2) # a new counter from keyword args
|
||||||
|
|
||||||
|
'''
|
||||||
|
self.update(iterable, **kwds)
|
||||||
|
|
||||||
|
def __missing__(self, key):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def most_common(self, n=None):
|
||||||
|
'''List the n most common elements and their counts from the most
|
||||||
|
common to the least. If n is None, then list all element counts.
|
||||||
|
|
||||||
|
>>> Counter('abracadabra').most_common(3)
|
||||||
|
[('a', 5), ('r', 2), ('b', 2)]
|
||||||
|
|
||||||
|
'''
|
||||||
|
if n is None:
|
||||||
|
return sorted(self.iteritems(), key=itemgetter(1), reverse=True)
|
||||||
|
return nlargest(n, self.iteritems(), key=itemgetter(1))
|
||||||
|
|
||||||
|
def elements(self):
|
||||||
|
'''Iterator over elements repeating each as many times as its count.
|
||||||
|
|
||||||
|
>>> c = Counter('ABCABC')
|
||||||
|
>>> sorted(c.elements())
|
||||||
|
['A', 'A', 'B', 'B', 'C', 'C']
|
||||||
|
|
||||||
|
If an element's count has been set to zero or is a negative number,
|
||||||
|
elements() will ignore it.
|
||||||
|
|
||||||
|
'''
|
||||||
|
for elem, count in self.iteritems():
|
||||||
|
for _ in repeat(None, count):
|
||||||
|
yield elem
|
||||||
|
|
||||||
|
# Override dict methods where the meaning changes for Counter objects.
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fromkeys(cls, iterable, v=None):
|
||||||
|
raise NotImplementedError(
|
||||||
|
'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
|
||||||
|
|
||||||
|
def update(self, iterable=None, **kwds):
|
||||||
|
'''Like dict.update() but add counts instead of replacing them.
|
||||||
|
|
||||||
|
Source can be an iterable, a dictionary, or another Counter instance.
|
||||||
|
|
||||||
|
>>> c = Counter('which')
|
||||||
|
>>> c.update('witch') # add elements from another iterable
|
||||||
|
>>> d = Counter('watch')
|
||||||
|
>>> c.update(d) # add elements from another counter
|
||||||
|
>>> c['h'] # four 'h' in which, witch, and watch
|
||||||
|
4
|
||||||
|
|
||||||
|
'''
|
||||||
|
if iterable is not None:
|
||||||
|
if hasattr(iterable, 'iteritems'):
|
||||||
|
if self:
|
||||||
|
self_get = self.get
|
||||||
|
for elem, count in iterable.iteritems():
|
||||||
|
self[elem] = self_get(elem, 0) + count
|
||||||
|
else:
|
||||||
|
dict.update(self, iterable) # fast path when counter is empty
|
||||||
|
else:
|
||||||
|
self_get = self.get
|
||||||
|
for elem in iterable:
|
||||||
|
self[elem] = self_get(elem, 0) + 1
|
||||||
|
if kwds:
|
||||||
|
self.update(kwds)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
'Like dict.copy() but returns a Counter instance instead of a dict.'
|
||||||
|
return Counter(self)
|
||||||
|
|
||||||
|
def __delitem__(self, elem):
|
||||||
|
'Like dict.__delitem__() but does not raise KeyError for missing values.'
|
||||||
|
if elem in self:
|
||||||
|
dict.__delitem__(self, elem)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
if not self:
|
||||||
|
return '%s()' % self.__class__.__name__
|
||||||
|
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
|
||||||
|
return '%s({%s})' % (self.__class__.__name__, items)
|
||||||
|
|
||||||
|
# Multiset-style mathematical operations discussed in:
|
||||||
|
# Knuth TAOCP Volume II section 4.6.3 exercise 19
|
||||||
|
# and at http://en.wikipedia.org/wiki/Multiset
|
||||||
|
#
|
||||||
|
# Outputs guaranteed to only include positive counts.
|
||||||
|
#
|
||||||
|
# To strip negative and zero counts, add-in an empty counter:
|
||||||
|
# c += Counter()
|
||||||
|
|
||||||
|
def __add__(self, other):
|
||||||
|
'''Add counts from two counters.
|
||||||
|
|
||||||
|
>>> Counter('abbb') + Counter('bcc')
|
||||||
|
Counter({'b': 4, 'c': 2, 'a': 1})
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
return NotImplemented
|
||||||
|
result = Counter()
|
||||||
|
for elem in set(self) | set(other):
|
||||||
|
newcount = self[elem] + other[elem]
|
||||||
|
if newcount > 0:
|
||||||
|
result[elem] = newcount
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __sub__(self, other):
|
||||||
|
''' Subtract count, but keep only results with positive counts.
|
||||||
|
|
||||||
|
>>> Counter('abbbc') - Counter('bccd')
|
||||||
|
Counter({'b': 2, 'a': 1})
|
||||||
|
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
return NotImplemented
|
||||||
|
result = Counter()
|
||||||
|
for elem in set(self) | set(other):
|
||||||
|
newcount = self[elem] - other[elem]
|
||||||
|
if newcount > 0:
|
||||||
|
result[elem] = newcount
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __or__(self, other):
|
||||||
|
'''Union is the maximum of value in either of the input counters.
|
||||||
|
|
||||||
|
>>> Counter('abbb') | Counter('bcc')
|
||||||
|
Counter({'b': 3, 'c': 2, 'a': 1})
|
||||||
|
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
return NotImplemented
|
||||||
|
_max = max
|
||||||
|
result = Counter()
|
||||||
|
for elem in set(self) | set(other):
|
||||||
|
newcount = _max(self[elem], other[elem])
|
||||||
|
if newcount > 0:
|
||||||
|
result[elem] = newcount
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __and__(self, other):
|
||||||
|
''' Intersection is the minimum of corresponding counts.
|
||||||
|
|
||||||
|
>>> Counter('abbb') & Counter('bcc')
|
||||||
|
Counter({'b': 1})
|
||||||
|
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
return NotImplemented
|
||||||
|
_min = min
|
||||||
|
result = Counter()
|
||||||
|
if len(self) < len(other):
|
||||||
|
self, other = other, self
|
||||||
|
for elem in ifilter(self.__contains__, other):
|
||||||
|
newcount = _min(self[elem], other[elem])
|
||||||
|
if newcount > 0:
|
||||||
|
result[elem] = newcount
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import doctest
|
||||||
|
print doctest.testmod()
|
||||||
|
|
@ -7,7 +7,11 @@ from obitools.ecopcr import EcoPCRDBFile
|
|||||||
from obitools.utils import universalOpen
|
from obitools.utils import universalOpen
|
||||||
from obitools.utils import ColumnFile
|
from obitools.utils import ColumnFile
|
||||||
import math
|
import math
|
||||||
|
try:
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
except ImportError:
|
||||||
|
from obitools.collections import Counter
|
||||||
|
|
||||||
|
|
||||||
class Taxonomy(object):
|
class Taxonomy(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -4,7 +4,11 @@ Created on 31 oct. 2009
|
|||||||
@author: coissac
|
@author: coissac
|
||||||
'''
|
'''
|
||||||
from random import randrange, sample
|
from random import randrange, sample
|
||||||
from obitools.utils import Counter
|
try:
|
||||||
|
from collections import Counter
|
||||||
|
except ImportError:
|
||||||
|
from obitools.collections import Counter
|
||||||
|
|
||||||
|
|
||||||
def lookfor(x,cumsum):
|
def lookfor(x,cumsum):
|
||||||
lmax=len(cumsum)
|
lmax=len(cumsum)
|
||||||
|
@ -15,7 +15,10 @@ import os.path
|
|||||||
from _utils import FakeFile
|
from _utils import FakeFile
|
||||||
from _utils import progressBar
|
from _utils import progressBar
|
||||||
|
|
||||||
|
try:
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
except ImportError:
|
||||||
|
from obitools.collections import Counter
|
||||||
|
|
||||||
|
|
||||||
class FileFormatError(Exception):
|
class FileFormatError(Exception):
|
||||||
|
Reference in New Issue
Block a user