Source code for grains.utils

# -*- coding: utf-8 -*-

"""
This module provides *general* utility functions used by the **grains** package. The specific
helper functions reside in the proper module. For example, a function that works on a general
list goes here, but a computational geometry algorithm goes to the **geometry** module. The
functions in the **utils** module can be interesting for other projects too, partly because of
their general scope, and partly because of the few dependencies.

Functions
---------
.. autosummary::
    :nosignatures:
    :toctree: functions/

    duplicates
    toggle
    index_list
    flatten_list
    argsorted
    map_inplace
    non_unique
    parse_kwargs
    compress
    decompress
    decompress_inmemory
    neighborhood

"""

import os
import zipfile
from functools import reduce

import numpy as np


[docs]def duplicates(sequence):
    """Set of duplicate elements in a sequence.

    Parameters
    ----------
    sequence : sequence types (list, tuple, string, etc.)
        Sequence possibly containing repeating elements.

    Returns
    -------
    set
        Set of unique values.

    Notes
    -----
    Copied from https://stackoverflow.com/a/9836685/4892892

    Examples
    --------
    Note that the order of the elements in the resulting set does not matter.

    >>> a = [1, 2, 3, 2, 1, 5, 6, 5, 5, 5]  # list
    >>> duplicates(a)
    {1, 2, 5}
    >>> a = (1, 1, 0, -1, -1, 0)  # tuple
    >>> duplicates(a)
    {0, 1, -1}
    >>> a = 'abbcdkc'  # string
    >>> duplicates(a)
    {'c', 'b'}
    """
    seen = set()
    seen_add = seen.add
    # adds all elements it doesn't know yet to seen and all other to seen_twice
    seen_twice = set(x for x in sequence if x in seen or seen_add(x))
    # turn the set into a list (as requested)
    return seen_twice


[docs]def toggle(lst):
    """Return True for False values and False for True values in a list.

    Parameters
    ----------
    lst : list
        An arbitrary list, possibly containing other lists.

    Returns
    -------
    list
        Element-wise logical not operator applied on the input list.

    Notes
    -----
    Solution taken from https://stackoverflow.com/a/51122372/4892892.

    Examples
    --------
    >>> toggle([True, False])
    [False, True]
    >>> toggle(['h', 0, 2.3, -2, 5, []])
    [False, True, False, False, False, True]

    """
    return list(map(lambda item: not item, lst))


[docs]def index_list(lst, indices):
    """Index a list by another list.

    Parameters
    ----------
    lst : list
        List to be indexed.
    indices : list
        Indices of the original list that will form the new list.

    Returns
    -------
    list
        Members of `lst`, selected by `indices`.

    Examples
    --------
    >>> index_list(['c', ['nested', 'list'], 13], [1, 2])
    [['nested', 'list'], 13]

    """
    return [lst[idx] for idx in indices]


[docs]def flatten_list(nested_list):
    """Merge a list of lists to a single list.

    Parameters
    ----------
    nested_list : list
        List containing other lists.

    Returns
    -------
    list
        Flattened list.

    Notes
    -----
    - Only a single level (i.e. list of lists) is handled, see the second example.
    - Several methods, such as list comprehension, monoid and loops, are proposed in
      https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists.
      Here, the list comprehension approach is used.

    Examples
    --------
    >>> nested_list = [['some'], ['items']]
    >>> flatten_list(nested_list)
    ['some', 'items']
    >>> multiply_nested_list = [[['item'], 'within', 'item']]
    >>> flatten_list(multiply_nested_list)
    [['item'], 'within', 'item']

    """
    return [item for sublist in nested_list for item in sublist]


[docs]def argsorted(sequence, reverse=False):
    """Return the indices that would sort a list or a tuple.

    Implementation is taken from https://stackoverflow.com/a/6979121/4892892.

    Parameters
    ----------
    sequence : list, tuple
        Input sequence in which the sorted indices to be found.
    reverse : bool
        If set to True, then the elements are sorted as if each comparison was reversed.

    Returns
    -------
    list
        List of indices that would sort the input list/tuple.

    See Also
    --------
    :func:`sorted`
    :func:`numpy.argsort`

    Examples
    --------
    >>> argsorted([2, 1.1, 1.1])
    [1, 2, 0]
    >>> argsorted([2, 1.1, 1.1], reverse=True)
    [0, 1, 2]
    >>> argsorted(())
    []

    """
    return sorted(range(len(sequence)), key=lambda k: sequence[k], reverse=reverse)


[docs]def map_inplace(function, __iterable):
    """Apply a function to each member of an iterable in-place.

    Parameters
    ----------
    function : function object
        Function to be applied to the entries of the iterable.
    __iterable : iterable
        Iterable.

    Notes
    -----
    Comprehensions or functional tools work on iterators, thereby not modifying the original
    container (https://stackoverflow.com/a/4148523/4892892). For in-place modification, the
    conventional for loop approach is used (https://stackoverflow.com/a/4148525/4892892).

    Examples
    --------
    >>> lst = ['2', 2]; func = lambda x: x*2
    >>> map_inplace(func, lst); lst
    ['22', 4]
    >>> lifespan = {'cat': 15, 'dog': 12}; die_early = lambda x: x/2
    >>> map_inplace(die_early, lifespan); lifespan
    {'cat': 7.5, 'dog': 6.0}

    """
    if type(__iterable) == dict:
        for key, value in __iterable.items():
            __iterable[key] = function(value)
    else:
        for i in range(len(__iterable)):
            __iterable[i] = function(__iterable[i])


[docs]def non_unique(array, axis=None):
    """Finds indices of non-unique elements in a 1D or 2D ndarray.

    Parameters
    ----------
    array : ndarray
        Array in which the non-unique elements are searched.
    axis : {None, 0, 1}, optional
        The axis to operate on. If None, `array` will be flattened. If an integer, the subarrays
        indexed by the given axis will be flattened and treated as the elements of a 1-D array with
        the dimension of the given axis. Object arrays or structured arrays that contain objects are
        not supported if the `axis` kwarg is used. The default is None.

    Returns
    -------
    nonunique_values : list
        Unique (individual, row or column) entries.
    nonunique_indices : list
        Each element of the list corresponds to non-unique elements, whose indices are given in
        a 1D numpy array.

    Examples
    --------
    In a 1D array, the repeated values and their indices are found by

    >>> val, idx = non_unique(np.array([1, -1, 0, -1, 2, 5, 0, -1]))
    >>> val
    [-1, 0]
    >>> idx
    [array([1, 3, 7]), array([2, 6])]

    In the matrix below, we can see that rows 0 and 2 are identical, as well as rows 1 and 4.

    >>> val, idx = non_unique(np.array([[1, 3], [2, 4], [1, 3], [-1, 0], [2, 4]]), axis=0)
    >>> val
    [array([1, 3]), array([2, 4])]
    >>> idx
    [array([0, 2]), array([1, 4])]

    By transposing the matrix above, the same holds for the columns.

    >>> val, idx = non_unique(np.array([[1, 2, 1, -1, 2], [3, 4, 3, 0, 4]]), axis=1)
    >>> val
    [array([1, 3]), array([2, 4])]
    >>> idx
    [array([0, 2]), array([1, 4])]

    If the dimensions along which to find the duplicates are not given, the input is flattened
    and the indexing happens in C-order (row-wise).

    >>> val, idx = non_unique(np.array([[1, 2, 1, -1, 2], [3, 4, 3, 0, 4]]))
    >>> val
    [1, 2, 3, 4]
    >>> idx
    [array([0, 2]), array([1, 4]), array([5, 7]), array([6, 9])]

    """
    if axis not in {None, 0, 1}:
        raise Exception('Parameter `axis` must be one of the following: None, 0, 1.')
    # Indices of the repeated elements
    _, idx, counts = np.unique(array, axis=axis, return_counts=True, return_inverse=True)
    distinct_indices = np.unique(idx)
    repeated_indices = distinct_indices[counts > 1]
    # Help in consistent indexing
    if axis is None:
        array = array.flatten()
    elif axis == 0:
        pass
    elif axis == 1:
        array = array.T
    # Find the repeated values and their positions at the same time
    nonunique_indices = []
    nonunique_values = []
    for i in repeated_indices:
        nonunique_indices.append(np.nonzero(idx == i)[0])
        nonunique_values.append(array[idx == i][0])
    return nonunique_values, nonunique_indices


[docs]def parse_kwargs(kwargs, defaults):
    """Compares keyword arguments with defaults.

    Allows processing keyword arguments supplied to a function by the user by comparing them with
    an admissible set of options defined by the developer. There are three cases:

        1. The keyword given by the user is present in the set the developer provides. Then the
        value belonging to the keyword overrides the default value.

        2. The keyword given by the user is `not` present in the set the developer provides. In
        this case, the unrecognized keyword, along with its value, is saved separately.

        3. The keyword existing in the set the developer provides is not given by the user. Then
        the default value is used.

    Parameters
    ----------
    kwargs : dict
        Keyword arguments (parameter-value pairs) passed to a function.
    defaults : dict
        Default parameter-value pairs.

    Returns
    -------
    parsed : dict
        Dictionary with the same keys as :code:`defaults`, the parsed parameter-value pairs.
    unknown : dict
        Dictionary containing the parameter-value pairs not present in :code:`defaults`.

    Notes
    -----
    The default values, given in the input dictionary :code:`defaults`, are never overwritten.

    Examples
    --------
    >>> default_options = {'opt1': 1, 'opt2': 'string', 'opt3': [-1, 0]}
    >>> user_options = {'opt3': [2, 3, -1], 'opt2': 'string',  'opt4': -2.14}
    >>> parsed_options, unknown_options = parse_kwargs(user_options, default_options)
    >>> parsed_options
    {'opt1': 1, 'opt2': 'string', 'opt3': [2, 3, -1]}
    >>> unknown_options
    {'opt4': -2.14}

    """
    parsed = defaults.copy()
    unknown = {}
    for param, value in kwargs.items():
        if param in defaults:
            parsed[param] = value
        else:
            unknown[param] = value
    return parsed, unknown


[docs]def compress(filename, level=9):
    """Creates a zip archive from a single file.

    Parameters
    ----------
    filename : str
        Name of the file to be compressed.
    level : int, optional
        Level of compression. Integers 0 through 9 are accepted. The default is 9.

    Returns
    -------
    None.

    See Also
    --------
    :class:`zipfile.ZipFile`

    """
    name = os.path.splitext(filename)[0]
    output_file = name + '.zip'
    zip_options = dict(compression=zipfile.ZIP_DEFLATED, compresslevel=level)
    with zipfile.ZipFile(output_file, mode='w', **zip_options) as compressed:
        compressed.write(filename)


[docs]def decompress(filename, path=None):
    """Decompresses the contents of a zip archive into the current directory.

    Parameters
    ----------
    filename : str
        Name of the zip archive.

    path : str, optional
        Directory to extract to. The default is the directory the function is called from.

    See Also
    --------
    :class:`zipfile.ZipFile`

    """
    with zipfile.ZipFile(filename, mode='r') as compressed:
        compressed.extractall(path=path)


[docs]def decompress_inmemory(filename):
    """Decompresses the contents of a zip archive into a dictionary.

    Parameters
    ----------
    filename : str
        Name of the zip archive.

    Returns
    -------
    data : dict
        The keys of the dictionary are the compressed file names (without extension),
        the corresponding values are their contents.

    See Also
    --------
    :class:`zipfile.ZipFile`

    """
    data = {}
    with zipfile.ZipFile(filename, mode='r') as compressed:
        for file in compressed.namelist():
            with compressed.open(file, mode='r') as thisfile:
                name = os.path.splitext(thisfile.name)[0]
                data[name] = compressed.read(thisfile.name).decode()
    return data


[docs]def neighborhood(center, radius, norm, method='ball', bounds=None):
    r"""Neighboring points to a grid point.

    Given a point in a subspace of :math:`\mathbb{Z}^n`, the neighboring points are determined
    based on the specified rules.

    .. todo::
        Currently, the neighbors are deterministically but not systematically ordered.
        Apart from testing purposes, this does not seem to be a big issue.
        Still, a logical ordering is desirable. E.g. ordering in increasing coordinate values,
        first in the first dimension and lastly in the last dimension.

    Parameters
    ----------
    center : tuple of int
        Point :math:`x_0` around which the neighborhood is searched. It must be an n-tuple of
        integers, where :math:`n` is the spatial dimension.
    radius : int, positive
        Radius of the ball or sphere in which the neighbors are searched.
        If the radius is 1, the immediate neighbors are returned.
    norm : {1, inf}
        Type of the vector norm :math:`\| x - x_0 \|`, where :math:`x` is a point whose
        distance is searched from the center :math:`x_0`.

        =========   ============   =========================================================
        Type        Name           Definition
        =========   ============   =========================================================
        1           1-norm         :math:`\| x \|_1 = \sum_{i=1}^n |x_i|`
        numpy.inf   maximum norm   :math:`\| x \|_{\infty} = \max\{ |x_1|, \ldots, |x_n| \}`
        =========   ============   =========================================================

        where inf means numpy's np.inf object.
    method : {'ball', 'sphere'}, optional
        Specifies the criterion of how to decide whether a point :math:`x` is in the neighborhood
        of :math:`x_0`. The default is 'ball'.

        For 'ball':

        .. math::
            \| x - x_0 \| \leq r

        For 'sphere':

        .. math::
            \| x - x_0 \| = r

        where :math:`r` is the radius passed as the :code:`radius` parameter and the type of the
        norm is taken based on the :code:`norm` input parameter.
    bounds : list of tuple, optional
        Restricts the neighbors within a box. The dimensions of the n-dimensional box are given
        as a list of 2-tuples: [(x_1_min, x_1_max), ... , (x_n_min, x_n_max)]. The default value
        is an unbounded box in all dimensions. Use np.inf to indicate unbounded values.

    Returns
    -------
    neighbors : tuple of ndarray
        Tuple of length n, each entry being a 1D numpy array: the integer indices of the points
        that are in the neighborhood of :code:`center`.


    Notes
    -----
    1. The `von Neumann neighborhood <https://mathworld.wolfram.com/vonNeumannNeighborhood.html>`_
    with range :math:`r` is a special case when :code:`radius=r`, :code:`norm=1` and
    :code:`method='ball'`.

    2. The `Moore neighborhood <https://mathworld.wolfram.com/MooreNeighborhood.html>`_
    with range :math:`r` is a special case when :code:`radius=r`, :code:`norm=np.inf` and
    :code:`method='ball'`.

    Examples
    --------
    Find the Moore neighborhood with range 2 the point (1) on the half-line [0, inf).

    >>> neighborhood((1,), 2, np.inf, bounds=[(0, np.inf)])
    (array([0, 1, 2, 3]),)

    Find the von Neumann neighborhood with range 2 around the point (2, 2), restricted on
    the domain [0, 4] x [0, 3].

    >>> neighborhood((2, 2), 2, 1, bounds=[(0, 4), (0, 3)])
    (array([2, 1, 2, 3, 0, 1, 2, 3, 4, 1, 2, 3]), array([0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3]))

    Find the Moore neighborhood with range 1 around the point (0, -4) such that the neighbors
    lie on the half-plane [0, 2] x (-inf, -4].

    >>> neighborhood((0, -4), 1, np.inf, bounds=[(0, 2), (-np.inf, -4)])
    (array([0, 1, 0, 1]), array([-5, -5, -4, -4]))

    Find the sphere of radius 2, measured in the 1-norm, around the point (-1, 0, 3), within the
    half-space {(x,y,z) in Z^3 | y>=0}.

    >>> neighborhood((-1, 0, 3), 2, 1, 'sphere', [(-np.inf, np.inf), (0, np.inf), (-np.inf, np.inf)])  # doctest: +NORMALIZE_WHITESPACE
    (array([-3, -2, -2, -1, -1,  0,  0,  1, -2, -1, -1,  0, -1]),
     array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2]),
     array([3, 2, 4, 1, 5, 2, 4, 3, 3, 2, 4, 3, 3]))

    """
    dim = len(center)
    if not bounds:
        bounds = [(-np.Inf, np.Inf) for i in range(dim)]

    # Every neighborhood is within this bounding box
    bounding_box = [np.linspace(center[i] - radius, center[i] + radius, 2 * radius + 1, dtype=int)
                    for i in range(dim)]
    # Coordinates of the points in that bounding box
    X = np.meshgrid(*bounding_box)
    candidates = [X[i].flatten() for i in range(dim)]

    # Distance of the candidate points from the center
    candidates_matrix = np.column_stack(candidates)
    center_matrix = np.reshape(center, (1, dim))
    n_candidates = (2 * radius + 1) ** dim
    center_matrix = np.repeat(center_matrix, n_candidates, axis=0)
    distance = lambda x: np.linalg.norm(x, norm, axis=1)
    distances = distance(candidates_matrix - center_matrix)

    # Select those points that satisfy the required distance measure
    within_distance = distances <= radius if method == 'ball' else distances == radius

    # Consider only neighbors that fall within the specified bounds
    within_bounds = []
    for i in range(dim):
        lower_bound = bounds[i][0]
        upper_bound = bounds[i][1]
        within_bounds.append((candidates_matrix[:, i] >= lower_bound) &
                             (candidates_matrix[:, i] <= upper_bound))
    within_bounds = reduce(np.logical_and, within_bounds)

    # Neighbors are given component-wise
    neighbors = tuple(candidates[i][within_distance & within_bounds] for i in range(dim))
    return neighbors


if __name__ == "__main__":
    import doctest
    doctest.testmod(verbose=True)