Source code for grains.utils

# -*- coding: utf-8 -*-

"""
This module provides *general* utility functions used by the **grains** package. The specific
helper functions reside in the proper module. For example, a function that works on a general
list goes here, but a computational geometry algorithm goes to the **geometry** module. The
functions in the **utils** module can be interesting for other projects too, partly because of
their general scope, and partly because of the few dependencies.

Functions
---------
.. autosummary::
    :nosignatures:
    :toctree: functions/

    duplicates
    toggle
    index_list
    flatten_list
    argsorted
    map_inplace
    non_unique
    parse_kwargs
    compress
    decompress
    decompress_inmemory
    neighborhood

"""

import os
import zipfile
from functools import reduce

import numpy as np


[docs]def duplicates(sequence): """Set of duplicate elements in a sequence. Parameters ---------- sequence : sequence types (list, tuple, string, etc.) Sequence possibly containing repeating elements. Returns ------- set Set of unique values. Notes ----- Copied from https://stackoverflow.com/a/9836685/4892892 Examples -------- Note that the order of the elements in the resulting set does not matter. >>> a = [1, 2, 3, 2, 1, 5, 6, 5, 5, 5] # list >>> duplicates(a) {1, 2, 5} >>> a = (1, 1, 0, -1, -1, 0) # tuple >>> duplicates(a) {0, 1, -1} >>> a = 'abbcdkc' # string >>> duplicates(a) {'c', 'b'} """ seen = set() seen_add = seen.add # adds all elements it doesn't know yet to seen and all other to seen_twice seen_twice = set(x for x in sequence if x in seen or seen_add(x)) # turn the set into a list (as requested) return seen_twice
[docs]def toggle(lst): """Return True for False values and False for True values in a list. Parameters ---------- lst : list An arbitrary list, possibly containing other lists. Returns ------- list Element-wise logical not operator applied on the input list. Notes ----- Solution taken from https://stackoverflow.com/a/51122372/4892892. Examples -------- >>> toggle([True, False]) [False, True] >>> toggle(['h', 0, 2.3, -2, 5, []]) [False, True, False, False, False, True] """ return list(map(lambda item: not item, lst))
[docs]def index_list(lst, indices): """Index a list by another list. Parameters ---------- lst : list List to be indexed. indices : list Indices of the original list that will form the new list. Returns ------- list Members of `lst`, selected by `indices`. Examples -------- >>> index_list(['c', ['nested', 'list'], 13], [1, 2]) [['nested', 'list'], 13] """ return [lst[idx] for idx in indices]
[docs]def flatten_list(nested_list): """Merge a list of lists to a single list. Parameters ---------- nested_list : list List containing other lists. Returns ------- list Flattened list. Notes ----- - Only a single level (i.e. list of lists) is handled, see the second example. - Several methods, such as list comprehension, monoid and loops, are proposed in https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists. Here, the list comprehension approach is used. Examples -------- >>> nested_list = [['some'], ['items']] >>> flatten_list(nested_list) ['some', 'items'] >>> multiply_nested_list = [[['item'], 'within', 'item']] >>> flatten_list(multiply_nested_list) [['item'], 'within', 'item'] """ return [item for sublist in nested_list for item in sublist]
[docs]def argsorted(sequence, reverse=False): """Return the indices that would sort a list or a tuple. Implementation is taken from https://stackoverflow.com/a/6979121/4892892. Parameters ---------- sequence : list, tuple Input sequence in which the sorted indices to be found. reverse : bool If set to True, then the elements are sorted as if each comparison was reversed. Returns ------- list List of indices that would sort the input list/tuple. See Also -------- :func:`sorted` :func:`numpy.argsort` Examples -------- >>> argsorted([2, 1.1, 1.1]) [1, 2, 0] >>> argsorted([2, 1.1, 1.1], reverse=True) [0, 1, 2] >>> argsorted(()) [] """ return sorted(range(len(sequence)), key=lambda k: sequence[k], reverse=reverse)
[docs]def map_inplace(function, __iterable): """Apply a function to each member of an iterable in-place. Parameters ---------- function : function object Function to be applied to the entries of the iterable. __iterable : iterable Iterable. Notes ----- Comprehensions or functional tools work on iterators, thereby not modifying the original container (https://stackoverflow.com/a/4148523/4892892). For in-place modification, the conventional for loop approach is used (https://stackoverflow.com/a/4148525/4892892). Examples -------- >>> lst = ['2', 2]; func = lambda x: x*2 >>> map_inplace(func, lst); lst ['22', 4] >>> lifespan = {'cat': 15, 'dog': 12}; die_early = lambda x: x/2 >>> map_inplace(die_early, lifespan); lifespan {'cat': 7.5, 'dog': 6.0} """ if type(__iterable) == dict: for key, value in __iterable.items(): __iterable[key] = function(value) else: for i in range(len(__iterable)): __iterable[i] = function(__iterable[i])
[docs]def non_unique(array, axis=None): """Finds indices of non-unique elements in a 1D or 2D ndarray. Parameters ---------- array : ndarray Array in which the non-unique elements are searched. axis : {None, 0, 1}, optional The axis to operate on. If None, `array` will be flattened. If an integer, the subarrays indexed by the given axis will be flattened and treated as the elements of a 1-D array with the dimension of the given axis. Object arrays or structured arrays that contain objects are not supported if the `axis` kwarg is used. The default is None. Returns ------- nonunique_values : list Unique (individual, row or column) entries. nonunique_indices : list Each element of the list corresponds to non-unique elements, whose indices are given in a 1D numpy array. Examples -------- In a 1D array, the repeated values and their indices are found by >>> val, idx = non_unique(np.array([1, -1, 0, -1, 2, 5, 0, -1])) >>> val [-1, 0] >>> idx [array([1, 3, 7]), array([2, 6])] In the matrix below, we can see that rows 0 and 2 are identical, as well as rows 1 and 4. >>> val, idx = non_unique(np.array([[1, 3], [2, 4], [1, 3], [-1, 0], [2, 4]]), axis=0) >>> val [array([1, 3]), array([2, 4])] >>> idx [array([0, 2]), array([1, 4])] By transposing the matrix above, the same holds for the columns. >>> val, idx = non_unique(np.array([[1, 2, 1, -1, 2], [3, 4, 3, 0, 4]]), axis=1) >>> val [array([1, 3]), array([2, 4])] >>> idx [array([0, 2]), array([1, 4])] If the dimensions along which to find the duplicates are not given, the input is flattened and the indexing happens in C-order (row-wise). >>> val, idx = non_unique(np.array([[1, 2, 1, -1, 2], [3, 4, 3, 0, 4]])) >>> val [1, 2, 3, 4] >>> idx [array([0, 2]), array([1, 4]), array([5, 7]), array([6, 9])] """ if axis not in {None, 0, 1}: raise Exception('Parameter `axis` must be one of the following: None, 0, 1.') # Indices of the repeated elements _, idx, counts = np.unique(array, axis=axis, return_counts=True, return_inverse=True) distinct_indices = np.unique(idx) repeated_indices = distinct_indices[counts > 1] # Help in consistent indexing if axis is None: array = array.flatten() elif axis == 0: pass elif axis == 1: array = array.T # Find the repeated values and their positions at the same time nonunique_indices = [] nonunique_values = [] for i in repeated_indices: nonunique_indices.append(np.nonzero(idx == i)[0]) nonunique_values.append(array[idx == i][0]) return nonunique_values, nonunique_indices
[docs]def parse_kwargs(kwargs, defaults): """Compares keyword arguments with defaults. Allows processing keyword arguments supplied to a function by the user by comparing them with an admissible set of options defined by the developer. There are three cases: 1. The keyword given by the user is present in the set the developer provides. Then the value belonging to the keyword overrides the default value. 2. The keyword given by the user is `not` present in the set the developer provides. In this case, the unrecognized keyword, along with its value, is saved separately. 3. The keyword existing in the set the developer provides is not given by the user. Then the default value is used. Parameters ---------- kwargs : dict Keyword arguments (parameter-value pairs) passed to a function. defaults : dict Default parameter-value pairs. Returns ------- parsed : dict Dictionary with the same keys as :code:`defaults`, the parsed parameter-value pairs. unknown : dict Dictionary containing the parameter-value pairs not present in :code:`defaults`. Notes ----- The default values, given in the input dictionary :code:`defaults`, are never overwritten. Examples -------- >>> default_options = {'opt1': 1, 'opt2': 'string', 'opt3': [-1, 0]} >>> user_options = {'opt3': [2, 3, -1], 'opt2': 'string', 'opt4': -2.14} >>> parsed_options, unknown_options = parse_kwargs(user_options, default_options) >>> parsed_options {'opt1': 1, 'opt2': 'string', 'opt3': [2, 3, -1]} >>> unknown_options {'opt4': -2.14} """ parsed = defaults.copy() unknown = {} for param, value in kwargs.items(): if param in defaults: parsed[param] = value else: unknown[param] = value return parsed, unknown
[docs]def compress(filename, level=9): """Creates a zip archive from a single file. Parameters ---------- filename : str Name of the file to be compressed. level : int, optional Level of compression. Integers 0 through 9 are accepted. The default is 9. Returns ------- None. See Also -------- :class:`zipfile.ZipFile` """ name = os.path.splitext(filename)[0] output_file = name + '.zip' zip_options = dict(compression=zipfile.ZIP_DEFLATED, compresslevel=level) with zipfile.ZipFile(output_file, mode='w', **zip_options) as compressed: compressed.write(filename)
[docs]def decompress(filename, path=None): """Decompresses the contents of a zip archive into the current directory. Parameters ---------- filename : str Name of the zip archive. path : str, optional Directory to extract to. The default is the directory the function is called from. See Also -------- :class:`zipfile.ZipFile` """ with zipfile.ZipFile(filename, mode='r') as compressed: compressed.extractall(path=path)
[docs]def decompress_inmemory(filename): """Decompresses the contents of a zip archive into a dictionary. Parameters ---------- filename : str Name of the zip archive. Returns ------- data : dict The keys of the dictionary are the compressed file names (without extension), the corresponding values are their contents. See Also -------- :class:`zipfile.ZipFile` """ data = {} with zipfile.ZipFile(filename, mode='r') as compressed: for file in compressed.namelist(): with compressed.open(file, mode='r') as thisfile: name = os.path.splitext(thisfile.name)[0] data[name] = compressed.read(thisfile.name).decode() return data
[docs]def neighborhood(center, radius, norm, method='ball', bounds=None): r"""Neighboring points to a grid point. Given a point in a subspace of :math:`\mathbb{Z}^n`, the neighboring points are determined based on the specified rules. .. todo:: Currently, the neighbors are deterministically but not systematically ordered. Apart from testing purposes, this does not seem to be a big issue. Still, a logical ordering is desirable. E.g. ordering in increasing coordinate values, first in the first dimension and lastly in the last dimension. Parameters ---------- center : tuple of int Point :math:`x_0` around which the neighborhood is searched. It must be an n-tuple of integers, where :math:`n` is the spatial dimension. radius : int, positive Radius of the ball or sphere in which the neighbors are searched. If the radius is 1, the immediate neighbors are returned. norm : {1, inf} Type of the vector norm :math:`\| x - x_0 \|`, where :math:`x` is a point whose distance is searched from the center :math:`x_0`. ========= ============ ========================================================= Type Name Definition ========= ============ ========================================================= 1 1-norm :math:`\| x \|_1 = \sum_{i=1}^n |x_i|` numpy.inf maximum norm :math:`\| x \|_{\infty} = \max\{ |x_1|, \ldots, |x_n| \}` ========= ============ ========================================================= where inf means numpy's np.inf object. method : {'ball', 'sphere'}, optional Specifies the criterion of how to decide whether a point :math:`x` is in the neighborhood of :math:`x_0`. The default is 'ball'. For 'ball': .. math:: \| x - x_0 \| \leq r For 'sphere': .. math:: \| x - x_0 \| = r where :math:`r` is the radius passed as the :code:`radius` parameter and the type of the norm is taken based on the :code:`norm` input parameter. bounds : list of tuple, optional Restricts the neighbors within a box. The dimensions of the n-dimensional box are given as a list of 2-tuples: [(x_1_min, x_1_max), ... , (x_n_min, x_n_max)]. The default value is an unbounded box in all dimensions. Use np.inf to indicate unbounded values. Returns ------- neighbors : tuple of ndarray Tuple of length n, each entry being a 1D numpy array: the integer indices of the points that are in the neighborhood of :code:`center`. Notes ----- 1. The `von Neumann neighborhood <https://mathworld.wolfram.com/vonNeumannNeighborhood.html>`_ with range :math:`r` is a special case when :code:`radius=r`, :code:`norm=1` and :code:`method='ball'`. 2. The `Moore neighborhood <https://mathworld.wolfram.com/MooreNeighborhood.html>`_ with range :math:`r` is a special case when :code:`radius=r`, :code:`norm=np.inf` and :code:`method='ball'`. Examples -------- Find the Moore neighborhood with range 2 the point (1) on the half-line [0, inf). >>> neighborhood((1,), 2, np.inf, bounds=[(0, np.inf)]) (array([0, 1, 2, 3]),) Find the von Neumann neighborhood with range 2 around the point (2, 2), restricted on the domain [0, 4] x [0, 3]. >>> neighborhood((2, 2), 2, 1, bounds=[(0, 4), (0, 3)]) (array([2, 1, 2, 3, 0, 1, 2, 3, 4, 1, 2, 3]), array([0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3])) Find the Moore neighborhood with range 1 around the point (0, -4) such that the neighbors lie on the half-plane [0, 2] x (-inf, -4]. >>> neighborhood((0, -4), 1, np.inf, bounds=[(0, 2), (-np.inf, -4)]) (array([0, 1, 0, 1]), array([-5, -5, -4, -4])) Find the sphere of radius 2, measured in the 1-norm, around the point (-1, 0, 3), within the half-space {(x,y,z) in Z^3 | y>=0}. >>> neighborhood((-1, 0, 3), 2, 1, 'sphere', [(-np.inf, np.inf), (0, np.inf), (-np.inf, np.inf)]) # doctest: +NORMALIZE_WHITESPACE (array([-3, -2, -2, -1, -1, 0, 0, 1, -2, -1, -1, 0, -1]), array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2]), array([3, 2, 4, 1, 5, 2, 4, 3, 3, 2, 4, 3, 3])) """ dim = len(center) if not bounds: bounds = [(-np.Inf, np.Inf) for i in range(dim)] # Every neighborhood is within this bounding box bounding_box = [np.linspace(center[i] - radius, center[i] + radius, 2 * radius + 1, dtype=int) for i in range(dim)] # Coordinates of the points in that bounding box X = np.meshgrid(*bounding_box) candidates = [X[i].flatten() for i in range(dim)] # Distance of the candidate points from the center candidates_matrix = np.column_stack(candidates) center_matrix = np.reshape(center, (1, dim)) n_candidates = (2 * radius + 1) ** dim center_matrix = np.repeat(center_matrix, n_candidates, axis=0) distance = lambda x: np.linalg.norm(x, norm, axis=1) distances = distance(candidates_matrix - center_matrix) # Select those points that satisfy the required distance measure within_distance = distances <= radius if method == 'ball' else distances == radius # Consider only neighbors that fall within the specified bounds within_bounds = [] for i in range(dim): lower_bound = bounds[i][0] upper_bound = bounds[i][1] within_bounds.append((candidates_matrix[:, i] >= lower_bound) & (candidates_matrix[:, i] <= upper_bound)) within_bounds = reduce(np.logical_and, within_bounds) # Neighbors are given component-wise neighbors = tuple(candidates[i][within_distance & within_bounds] for i in range(dim)) return neighbors
if __name__ == "__main__": import doctest doctest.testmod(verbose=True)