Skip to content

Instantly share code, notes, and snippets.

@dsc
Last active August 11, 2020 15:53
Show Gist options
  • Save dsc/0eedbe52b6ffd8030b80f7b92e4e1e2b to your computer and use it in GitHub Desktop.
Save dsc/0eedbe52b6ffd8030b80f7b92e4e1e2b to your computer and use it in GitHub Desktop.
Random Python tools (py2 bc lazy)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Chunking utilities -- Functions to group an iterable into pieces or "chunks".
"""
### String Chunking Utils
def chunks(it, sep, fileAsBytes=True, asFile=None):
"Divides a stream into iterable groups based on either a size or a delimiter."
if (isinstance(it, file) if asFile is None else asFile) and fileAsBytes:
it = chunkFileBytes(it)
return (chunksBySize if isinstance(sep, int) else chunksByDelimiter)(it, sep)
def chunkFileBytes(f):
while True:
c = f.read(1)
if not c:
raise StopIteration()
yield c
def chunksBySize(it, size):
alive = [True]
it = iter(it)
while alive:
peek = it.next()
def chunker():
yield peek
for i in xrange(size-1):
try:
yield it.next()
except StopIteration:
alive.pop()
raise
yield chunker()
def chunksByDelimiter(it, delimiter, yield_delimiter=True):
alive = [True]
it = iter(it)
while alive:
peek = it.next()
def chunker():
last = peek
yield peek
while True:
if last == delimiter:
raise StopIteration()
try:
last = it.next()
if last != delimiter or yield_delimiter:
yield last
except StopIteration:
alive.pop()
raise
yield chunker()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" A few simple operations on collections.
"""
from collections import Iterable, Mapping, Set, Sequence
def iteritems(it, index=False):
"Attempts to return an Iterator of (key, value) pairs."
if hasattr(it, 'iteritems'):
for kv in it.iteritems():
yield kv
elif isinstance(it, Mapping):
for k in it:
yield (k, it[k])
elif isinstance(it, Sequence) and index:
for v in it:
yield (it.index(v), v)
elif isinstance(it, Iterable):
for iv in enumerate(it):
yield iv
else:
raise TypeError("Iterable %s is not iterable!" % type(it).__name__)
def items( *cs, **kw ):
return chain(*[ iteritems(c) if isinstance(c, Mapping) else iter(c) for c in cs+(kw,) if c ])
def merge( *cs, **kw ):
return type( cs[0] if cs and cs[0] is not None else kw )( items(*cs, **kw) )
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__all__ = ('walk',)
from itertools import chain, repeat
from collections import Iterable, Mapping, Set, Sequence
def I(x, *args, **kwargs): return x
class walk(object):
""" walk tree of iterables, emitting elements.
"""
def __init__(self, it, containers=(list, tuple, Mapping, Set)):
self.it = it
self.containers = containers
def __iter__(self):
if isinstance(it, Mapping):
return new(it, ( fn((k, walkmap(fn,v,new,containers))) if isinstance(v, containers) else fn((k,v)) for k,v in iteritems(it) ))
else:
return new(it, ( walkmap(fn, el, new, containers) if isinstance(el, containers) else fn(el) for el in iter(it) ))
class Walker(walk):
""" A Walker walks a tree of iterables, invoking `leaf(val, key)` for each leaf,
and `branch(container, key)` for each branch. Keys will be indices
via `enumerate` for non-Mapping types.
`containers`: a tuple of types which trigger recursion. Defaults to:
(list, tuple, Mapping, Set).
"""
containers = (list, tuple, Mapping, Set,)
it = None
def __init__(self, it=None):
self.containers = tuple() if containers is None else containers
self.it = it
def newBranch(self, branch):
return type(branch)()
def leaf(self, val, key):
return val
def __iter__(self):
return self.map(self.it)
def map(self, it):
pass
def walkmap(fn, it, new=None, containers=(list, tuple, Mapping, Set)):
""" Recursively maps all elements in a potentially hierarchical iterable
`it`, returning an iterable of the same shape. Mappings emit (key, value)
pairs as their elements (acquired using lessly.collect.items()).
`new`: a callable new(iterable, data) -> new_iterable invoked
when a new instance of a mapped iterable is needed.
`containers`: a tuple of types which trigger recursion. Defaults to:
(list, tuple, Mapping, Set).
"""
if new is None:
def new(c, data):
return type(c)(data)
if isinstance(it, Mapping):
return new(it, ( fn((k, walkmap(fn,v,new,containers))) if isinstance(v, containers) else fn((k,v)) for k,v in iteritems(it) ))
else:
return new(it, ( walkmap(fn, el, new, containers) if isinstance(el, containers) else fn(el) for el in iter(it) ))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment