"""
===================================
The :mod:`array_split.split` Module
===================================
.. currentmodule:: array_split.split
Defines array splitting functions and classes.
Classes and Functions
=====================
.. autosummary::
:toctree: generated/
shape_factors - Compute *largest* factors of a given integer.
calculate_num_slices_per_axis - Computes per-axis divisions for a multi-dimensional shape.
calculate_tile_shape_for_max_bytes - Calculate a tile shape subject to max bytes restriction.
ShapeSplitter - Splits a given shape into slices.
shape_split - Splits a specified shape and returns :obj:`numpy.ndarray` of :obj:`slice` elements.
array_split - Equivalent to :func:`numpy.array_split`.
Attributes
==========
.. autodata:: ARRAY_BOUNDS
.. autodata:: NO_BOUNDS
"""
from __future__ import absolute_import
from .license import license as _license, copyright as _copyright
import array_split as _array_split
import array_split.logging as _logging
import numpy as _np
__author__ = "Shane J. Latham"
__license__ = _license()
__copyright__ = _copyright()
__version__ = _array_split.__version__
def is_scalar(obj):
"""
Returns :samp:`True` if argument :samp:`{obj}` is
a numeric type.
"""
return hasattr(obj, "__int__") or hasattr(obj, "__long__")
def is_sequence(obj):
"""
Returns :samp:`True` if argument :samp:`{obj}` is
a sequence (e.g. a :obj:`list` or :obj:`tuple`, etc).
"""
return hasattr(obj, "__len__") or hasattr(obj, "__getitem__")
def is_indices(indices_or_sections):
"""
Returns :samp:`True` if argument :samp:`{indices_or_sections}` is
a sequence (e.g. a :obj:`list` or :obj:`tuple`, etc).
"""
return is_sequence(indices_or_sections)
def pad_with_object(sequence, new_length, obj=None):
"""
Returns :samp:`sequence` :obj:`list` end-padded with :samp:`{obj}`
elements so that the length of the returned list equals :samp:`{new_length}`.
"""
if len(sequence) < new_length:
sequence = \
list(sequence) + [obj, ] * (new_length - len(sequence))
elif len(sequence) > new_length:
raise ValueError(
"Got len(sequence)=%s which exceeds new_length=%s"
%
(len(sequence), new_length)
)
return sequence
def pad_with_none(sequence, new_length):
"""
Returns :samp:`sequence` :obj:`list` end-padded with :samp:`None`
elements so that the length of the returned list equals :samp:`{new_length}`.
"""
return pad_with_object(sequence, new_length, obj=None)
[docs]def shape_factors(n, dim=2):
"""
Returns a :obj:`numpy.ndarray` of factors :samp:`f` such
that :samp:`(len(f) == {dim}) and (numpy.product(f) == {n})`.
The returned factors are as *square* (*cubic*, etc) as possible.
For example::
>>> shape_factors(24, 1)
array([24])
>>> shape_factors(24, 2)
array([4, 6])
>>> shape_factors(24, 3)
array([2, 3, 4])
>>> shape_factors(24, 4)
array([2, 2, 2, 3])
>>> shape_factors(24, 5)
array([1, 2, 2, 2, 3])
>>> shape_factors(24, 6)
array([1, 1, 2, 2, 2, 3])
:type n: :obj:`int`
:param n: Integer which is factored into :samp:`{dim}` factors.
:type dim: :obj:`int`
:param dim: Number of factors.
:rtype: :obj:`numpy.ndarray`
:return: A :samp:`({dim},)` shaped array of integers which are factors of :samp:`{n}`.
"""
if dim <= 1:
factors = [n, ]
else:
for f in range(int(n**(1.0 / float(dim))) + 1, 0, -1):
if ((n % f) == 0):
factors = [f, ] + list(shape_factors(n // f, dim=dim - 1))
break
factors.sort()
return _np.array(factors)
[docs]def calculate_tile_shape_for_max_bytes(
array_shape,
array_itemsize,
max_tile_bytes,
max_tile_shape=None,
sub_tile_shape=None,
halo=None
):
"""
Returns a tile shape :samp:`tile_shape`
such that :samp:`numpy.product(tile_shape)*numpy.sum({array_itemsize}) <= {max_tile_bytes}`.
Also, if :samp:`{max_tile_shape} is not None`
then :samp:`numpy.all(tile_shape <= {max_tile_shape}) is True` and
if :samp:`{sub_tile_shape} is not None`
the :samp:`numpy.all((tile_shape % {sub_tile_shape}) == 0) is True`.
:type array_shape: sequence of :obj:`int`
:param array_shape: Shape of the array which is to be split into tiles.
:type array_itemsize: :obj:`int`
:param array_itemsize: The number of bytes per element of the array to be tiled.
:type max_tile_bytes: :obj:`int`
:param max_tile_bytes: The maximum number of bytes for the returned :samp:`tile_shape`.
:type max_tile_shape: sequence of :obj:`int`
:param max_tile_shape: Per axis maximum shapes for the returned :samp:`tile_shape`.
:type sub_tile_shape: sequence of :obj:`int`
:param sub_tile_shape: The returned :samp:`tile_shape` will be an even multiple
of this sub-tile shape.
:type halo: :obj:`int`, sequence of :obj:`int`, or :samp:`(len({array_shape}), 2)`
shaped :obj:`numpy.ndarray`
:param halo: How tiles are extended in each axis direction with *halo*
elements. See :ref:`the-halo-parameter-examples` for meaning of :samp:`{halo}` values.
:rtype: :obj:`numpy.ndarray`
:return: A 1D array of shape :samp:`(len(array_shape),)` indicating a *tile shape*
which will (approximately) uniformly divide the given :samp:`{array_shape}` into
tiles (sub-arrays).
Examples::
>>> from array_split.split import calculate_tile_shape_for_max_bytes
>>> calculate_tile_shape_for_max_bytes(
... array_shape=[512,],
... array_itemsize=1,
... max_tile_bytes=512
... )
array([512])
>>> calculate_tile_shape_for_max_bytes(
... array_shape=[512,],
... array_itemsize=2, # Doubling the itemsize halves the tile size.
... max_tile_bytes=512
... )
array([256])
>>> calculate_tile_shape_for_max_bytes(
... array_shape=[512,],
... array_itemsize=1,
... max_tile_bytes=512-1 # tile shape will now be halved
... )
array([256])
"""
logger = _logging.getLogger(__name__ + ".calculate_tile_shape_for_max_bytes")
logger.debug("calculate_tile_shape_for_max_bytes: enter:")
logger.debug("array_shape=%s", array_shape)
logger.debug("array_itemsize=%s", array_itemsize)
logger.debug("max_tile_bytes=%s", max_tile_bytes)
logger.debug("max_tile_shape=%s", max_tile_shape)
logger.debug("sub_tile_shape=%s", sub_tile_shape)
logger.debug("halo=%s", halo)
array_shape = _np.array(array_shape, dtype="int64")
array_itemsize = _np.sum(array_itemsize, dtype="int64")
if max_tile_shape is None:
max_tile_shape = _np.array(array_shape, copy=True)
max_tile_shape = \
_np.array(_np.minimum(max_tile_shape, array_shape), copy=True, dtype=array_shape.dtype)
if sub_tile_shape is None:
sub_tile_shape = _np.ones((len(array_shape),), dtype="int64")
sub_tile_shape = _np.array(sub_tile_shape, dtype="int64")
if halo is None:
halo = _np.zeros((len(array_shape), 2), dtype="int64")
elif is_scalar(halo):
halo = _np.zeros((len(array_shape), 2), dtype="int64") + halo
else:
halo = _np.array(halo, copy=True)
if len(halo.shape) == 1:
halo = _np.array([halo, halo]).T.copy()
if halo.shape[0] != len(array_shape):
raise ValueError(
"Got halo.shape=%s, expecting halo.shape=(%s, 2)"
%
(halo.shape, array_shape.shape[0])
)
if _np.any(array_shape < sub_tile_shape):
raise ValueError(
"Got array_shape=%s element less than corresponding sub_tile_shape=%s element."
%
(
array_shape,
sub_tile_shape
)
)
logger.debug("max_tile_shape=%s", max_tile_shape)
logger.debug("sub_tile_shape=%s", sub_tile_shape)
logger.debug("halo=%s", halo)
array_sub_tile_split_shape = ((array_shape - 1) // sub_tile_shape) + 1
tile_sub_tile_split_shape = array_shape // sub_tile_shape
if len(tile_sub_tile_split_shape) <= 1:
tile_sub_tile_split_shape[0] = \
int(_np.floor(
(
(max_tile_bytes / float(array_itemsize))
-
_np.sum(halo)
)
/
float(sub_tile_shape[0])
))
tile_sub_tile_split_shape = \
_np.minimum(
tile_sub_tile_split_shape,
max_tile_shape // sub_tile_shape
)
logger.debug("tile_sub_tile_split_shape=%s", tile_sub_tile_split_shape)
current_axis = 0
while (
(current_axis < len(tile_sub_tile_split_shape.shape))
and
(
(
_np.product(tile_sub_tile_split_shape * sub_tile_shape + _np.sum(halo, axis=1))
*
array_itemsize
)
>
max_tile_bytes
)
):
if current_axis < (len(tile_sub_tile_split_shape) - 1):
tile_sub_tile_split_shape[current_axis] = 1
tile_sub_tile_split_shape[current_axis] = \
(
max_tile_bytes
//
(
_np.product(
tile_sub_tile_split_shape *
sub_tile_shape +
_np.sum(
halo,
axis=1))
*
array_itemsize
)
)
if tile_sub_tile_split_shape[current_axis] <= 0:
tile_sub_tile_split_shape[current_axis] = 1
current_axis += 1
else:
sub_tile_shape_h = sub_tile_shape.copy()
sub_tile_shape_h[0:current_axis] += _np.sum(halo[0:current_axis, :], axis=1)
tile_sub_tile_split_shape[current_axis] = \
int(_np.floor(
(
(max_tile_bytes / float(array_itemsize))
-
_np.sum(halo[current_axis]) * _np.product(sub_tile_shape_h[0:current_axis])
)
/
float(_np.product(sub_tile_shape_h))
))
logger.debug("tile_sub_tile_split_shape=%s", tile_sub_tile_split_shape)
tile_shape = _np.minimum(array_shape, tile_sub_tile_split_shape * sub_tile_shape)
logger.debug("pre cannonicalise tile_shape=%s", tile_shape)
tile_split_shape = ((array_shape - 1) // tile_shape) + 1
logger.debug("tile_split_shape=%s", tile_split_shape)
tile_shape = (((array_sub_tile_split_shape - 1) // tile_split_shape) + 1) * sub_tile_shape
logger.debug("post cannonicalise tile_shape=%s", tile_shape)
return tile_shape
[docs]def calculate_num_slices_per_axis(num_slices_per_axis, num_slices, max_slices_per_axis=None):
"""
Returns a :obj:`numpy.ndarray` (:samp:`return_array` say) where non-positive elements of
the :samp:`{num_slices_per_axis}` sequence have been replaced with
positive integer values such that :samp:`numpy.product(return_array) == num_slices`
and::
numpy.all(
return_array[numpy.where(num_slices_per_axis <= 0)]
<=
max_slices_per_axis[numpy.where(num_slices_per_axis <= 0)]
) is True
:type num_slices_per_axis: sequence of :obj:`int`
:param num_slices_per_axis: Constraint for per-axis sub-divisions.
Non-positive elements indicate values to be replaced in the
returned array. Positive values are identical to the corresponding
element in the returned array.
:type num_slices: integer
:param num_slices: Indicates the number of slices (rectangular sub-arrays)
formed by performing sub-divisions per axis. The returned array :samp:`return_array`
has elements assigned such that :samp:`numpy.product(return_array) == {num_slices}`.
:type max_slices_per_axis: sequence of :obj:`int` (or :samp:`None`)
:param max_slices_per_axis: Constraint specifying maximum number of per-axis sub-divisions.
If :samp:`None` defaults to :samp:`numpy.array([numpy.inf,]*len({num_slices_per_axis}))`.
:rtype: :obj:`numpy.ndarray`
:return: An array :samp:`return_array`
such that :samp:`numpy.product(return_array) == num_slices`.
Examples::
>>> from array_split.split import calculate_num_slices_per_axis
>>>
>>> calculate_num_slices_per_axis([0, 0, 0], 16)
array([4, 2, 2])
>>> calculate_num_slices_per_axis([1, 0, 0], 16)
array([1, 4, 4])
>>> calculate_num_slices_per_axis([1, 0, 0], 16, [2, 2, 16])
array([1, 2, 8])
"""
logger = _logging.getLogger(__name__)
ret_array = _np.array(num_slices_per_axis, copy=True)
if max_slices_per_axis is None:
max_slices_per_axis = _np.array([_np.inf, ] * len(num_slices_per_axis))
max_slices_per_axis = _np.array(max_slices_per_axis)
if _np.any(max_slices_per_axis <= 0):
raise ValueError("Got non-positive value in max_slices_per_axis=%s" % max_slices_per_axis)
while _np.any(ret_array <= 0):
prd = _np.product(ret_array[_np.where(ret_array > 0)]) # returns 1 for zero-length array
if (num_slices < prd) or ((num_slices % prd) > 0):
raise ValueError(
(
"Unable to construct grid of num_slices=%s elements from "
+
"num_slices_per_axis=%s (with max_slices_per_axis=%s)"
)
%
(num_slices, num_slices_per_axis, max_slices_per_axis)
)
ridx = _np.where(ret_array <= 0)
f = shape_factors(num_slices // prd, ridx[0].shape[0])[::-1]
if _np.all(f < max_slices_per_axis[ridx]):
ret_array[ridx] = f
else:
for i in range(ridx[0].shape[0]):
if f[i] < max_slices_per_axis[ridx[0][i]]:
ret_array[ridx[0][i]] = f[i]
else:
ret_array[ridx[0][i]] = max_slices_per_axis[ridx[0][i]]
prd = _np.product(ret_array[_np.where(ret_array > 0)])
while (num_slices % prd) > 0:
ret_array[ridx[0][i]] -= 1
prd = _np.product(ret_array[_np.where(ret_array > 0)])
break
logger.debug("ridx=%s, f=%s, ret_array=%s", ridx, f, ret_array)
return ret_array
_array_shape_param_doc =\
"""
:type array_shape: sequence of :obj:`int`
:param array_shape: The shape to be *split*.
"""
_array_start_param_doc =\
"""
:type array_start: :samp:`None` or sequence of :obj:`int`
:param array_start: The start index. Defaults to :samp:`[0,]*len(array_shape)`.
The array indexing extents are assumed to range from :samp:`{array_start}`
to :samp:`{array_start} + {array_shape}`.
See :ref:`the-array_start-parameter-examples` examples.
"""
_array_itemsize_param_doc =\
"""
:type array_itemsize: int or sequence of :obj:`int`
:param array_itemsize: Number of bytes per array element.
Only relevant when :samp:`{max_tile_bytes}` is specified.
See :ref:`splitting-by-maximum-bytes-per-tile-examples` examples.
"""
_array_tile_bounds_policy_param_doc =\
"""
:type tile_bounds_policy: :obj:`str`
:param tile_bounds_policy: Specifies whether tiles can extend beyond the array boundaries.
Only relevant for halo values greater than one. If :samp:`{tile_bounds_policy}`
is :data:`ARRAY_BOUNDS`
then the calculated tiles will not extend beyond the array
extents :samp:`{array_start}` and :samp:`{array_start} + {array_shape}`.
If :samp:`{tile_bounds_policy}` is :data:`NO_BOUNDS`
then the returned tiles will extend beyond
the :samp:`{array_start}` and :samp:`{array_start} + {array_shape}` extend
for positive :samp:`{halo}` values. See :ref:`the-halo-parameter-examples` examples.
"""
_ShapeSplitter__init__params_doc =\
"""
:type indices_or_sections: :samp:`None`, :obj:`int` or sequence of :obj:`int`
:param indices_or_sections: If an integer, indicates the number of
elements in the calculated *split* array. If a sequence, indicates
the indicies (per axis) at which the splits occur.
See :ref:`splitting-by-number-of-tiles-examples` examples.
:type axis: :samp:`None`, :obj:`int` or sequence of :obj:`int`
:param axis: If an integer, indicates the axis which is to be split.
If a sequence integers, indicates the number of slices per axis,
i.e. if :samp:`{axis} = [3, 5]` then axis :samp:`0` is cut into
3 slices and axis :samp:`1` is cut into 5 slices for a total
of 15 (:samp:`3*5`) rectangular slices in the returned :samp:`(3, 5)`
shaped split.
See :ref:`splitting-by-number-of-tiles-examples` examples
and :ref:`splitting-by-per-axis-split-indices-examples` examples.
%s%s
:type tile_shape: :samp:`None` or sequence of :obj:`int`
:param tile_shape: When not :samp:`None`, specifies explicit shape for tiles.
Should be same length as :samp:`{array_shape}`.
See :ref:`splitting-by-tile-shape-examples` examples.
:type max_tile_bytes: :samp:`None` or :obj:`int`
:param max_tile_bytes: The maximum number of bytes for calculated :samp:`tile_shape`.
See :ref:`splitting-by-maximum-bytes-per-tile-examples` examples.
:type max_tile_shape: :samp:`None` or sequence of :obj:`int`
:param max_tile_shape: Per axis maximum shapes for the calculated :samp:`tile_shape`.
Only relevant when :samp:`{max_tile_bytes}` is specified. Should be same length
as :samp:`{array_shape}`.
See :ref:`splitting-by-maximum-bytes-per-tile-examples` examples.
:type sub_tile_shape: :samp:`None` or sequence of :obj:`int`
:param sub_tile_shape: When not :samp:`None`, the calculated :samp:`tile_shape` will
be an even multiple of this sub-tile shape. Only relevant when :samp:`{max_tile_bytes}`
is specified. Should be same length as :samp:`{array_shape}`.
See :ref:`splitting-by-maximum-bytes-per-tile-examples` examples.%s%s
"""
_halo_param_doc =\
"""
:type halo: :samp:`None`, :obj:`int`, sequence of :obj:`int`, or :samp:`(len({array_shape}), 2)`
shaped :obj:`numpy.ndarray`
:param halo: How tiles are extended per axis in -ve and +ve directions with *halo*
elements. See :ref:`the-halo-parameter-examples` examples.
"""
#: Indicates that tiles are always within the array bounds.
#: See :ref:`the-halo-parameter-examples` examples.
__ARRAY_BOUNDS = "array_bounds"
@property
def ARRAY_BOUNDS():
"""
Indicates that tiles are always within the array bounds,
resulting in tiles which have truncated halos.
See :ref:`the-halo-parameter-examples` examples.
"""
return __ARRAY_BOUNDS
#: Indicates that tiles may extend beyond the array bounds.
#: See :ref:`the-halo-parameter-examples` examples.
__NO_BOUNDS = "no_bounds"
@property
def NO_BOUNDS():
"""
Indicates that tiles may have halos which extend beyond the array bounds.
See :ref:`the-halo-parameter-examples` examples.
"""
return __NO_BOUNDS
[docs]class ShapeSplitter(object):
"""
Implements array shape splitting. There are three main (top-level) methods:
:meth:`__init__`
Initialisation of parameters which define the split.
:meth:`set_split_extents`
Calculates the per-axis indices for the cuts. Sets
the :attr:`split_shape`, :attr:`split_begs`
and :attr:`split_ends` attributes.
:meth:`calculate_split`
Calls :meth:`set_split_extents` followed
by :meth:`calculate_split_from_extents` to
return the :obj:`numpy.ndarray` of :obj:`tuple` elements (slices).
"""
#: Class attribute for :obj:`logging.Logger` logging.
logger = _logging.getLogger(__name__ + ".ShapeSplitter")
#: Class attribute indicating list of valid values for :attr:`tile_bound_policy`.
#: See :data:`ARRAY_BOUNDS` and :data:`NO_BOUNDS`.
valid_tile_bounds_policies = [ARRAY_BOUNDS, NO_BOUNDS]
[docs] def __init__(
self,
array_shape,
indices_or_sections=None,
axis=None,
array_start=None,
array_itemsize=1,
tile_shape=None,
max_tile_bytes=None,
max_tile_shape=None,
sub_tile_shape=None,
halo=None,
tile_bounds_policy=ARRAY_BOUNDS
):
self.array_shape = _np.array(array_shape)
if array_start is None:
array_start = _np.zeros_like(self.array_shape)
self.array_start = array_start
self.array_itemsize = array_itemsize
indices_per_axis = None
if is_indices(indices_or_sections):
num_subarrays = None
indices_per_axis = indices_or_sections
if (
((axis is None) or is_scalar(axis))
and
(not _np.any([is_sequence(_e) for _e in indices_or_sections]))
):
if axis is None:
axis = 0
# Make indices_per_axis a list of lists, so that
# element 0 is a list of indices for axis 0
indices_per_axis = [None, ] * len(array_shape)
indices_per_axis[axis] = indices_or_sections
else:
indices_per_axis = None
num_subarrays = indices_or_sections
self.indices_per_axis = indices_per_axis
self.split_size = num_subarrays
split_num_slices_per_axis = None
if (self.split_size is not None) or (axis is not None):
if axis is None:
axis = 0
if is_sequence(axis):
split_num_slices_per_axis = pad_with_object(axis, len(self.array_shape), 1)
elif self.split_size is not None:
split_num_slices_per_axis = pad_with_object([], len(self.array_shape), 1)
split_num_slices_per_axis[axis] = self.split_size
self.split_num_slices_per_axis = split_num_slices_per_axis
self.tile_shape = tile_shape
self.max_tile_bytes = max_tile_bytes
self.max_tile_shape = max_tile_shape
self.sub_tile_shape = sub_tile_shape
if halo is None:
halo = _np.zeros((len(self.array_shape), 2), dtype="int64")
elif is_scalar(halo):
halo = _np.zeros((len(self.array_shape), 2), dtype="int64") + halo
elif (len(array_shape) == 1) and (_np.array(halo).shape == (2,)):
halo = _np.array([halo, ], copy=True)
elif len(_np.array(halo).shape) == 1:
halo = _np.array([halo, halo]).T.copy()
else:
halo = _np.array(halo, copy=True)
self.halo = halo
if tile_bounds_policy is None:
tile_bounds_policy = ARRAY_BOUNDS
self.tile_bounds_policy = tile_bounds_policy
self.tile_beg_min = self.array_start
self.tile_end_max = self.array_start + self.array_shape
self.split_shape = None
self.split_begs = None
self.split_ends = None
@property
def array_shape(self):
"""
The shape of the array which is to be split. A sequence of :obj:`int` indicating the
per-axis sizes which are to be split.
"""
return self.__array_shape
@array_shape.setter
def array_shape(self, array_shape):
self.__array_shape = array_shape
@property
def array_start(self):
"""
The start index. A sequence of :obj:`int` indicating the start of indexing for
the tile slices. Defaults to :samp:`numpy.zeros_like({self}.array_shape)`.
"""
return self.__array_start
@array_start.setter
def array_start(self, array_start):
self.__array_start = array_start
@property
def array_itemsize(self):
"""
The number of bytes per array element, see :attr:`max_tile_bytes`.
"""
return self.__array_itemsize
@array_itemsize.setter
def array_itemsize(self, array_itemsize):
self.__array_itemsize = array_itemsize
@property
def indices_per_axis(self):
"""
The per-axis indices indicating the cuts for the split.
A :obj:`list` of 1D :obj:`numpy.ndarray` objects such
that :samp:`{self}.indices_per_axis[i]` indicates the
cut positions for axis :samp:`i`.
"""
return self.__indices_per_axis
@indices_per_axis.setter
def indices_per_axis(self, indices_per_axis):
self.__indices_per_axis = indices_per_axis
@property
def split_size(self):
"""
An :obj:`int` indicating the number of tiles in the calculated split.
"""
return self.__split_size
@split_size.setter
def split_size(self, split_size):
self.__split_size = split_size
@property
def split_num_slices_per_axis(self):
"""
Number of slices per axis.
A 1D :obj:`numpy.ndarray` of :obj:`int` indicating the number of slices (sections)
per axis, so that :samp:`{self}.split_num_slices_per_axis[i]` is an integer
indicating the number of sections along axis :samp:`i` in the calculated split.
"""
return self.__split_num_slices_per_axis
@split_num_slices_per_axis.setter
def split_num_slices_per_axis(self, split_num_slices_per_axis):
self.__split_num_slices_per_axis = split_num_slices_per_axis
@property
def tile_shape(self):
"""
The shape of all tiles in the calculated split.
A 1D :samp:`numpy.ndarray` of :obj:`int` indicating the per-axis
number of elements for tiles in the calculated split.
"""
return self.__tile_shape
@tile_shape.setter
def tile_shape(self, tile_shape):
self.__tile_shape = tile_shape
@property
def max_tile_bytes(self):
"""
The maximum number of bytes for any tile (including :attr:`halo`) in the returned split.
An :obj:`int` which constrains the tile shape such that any tile
from the computed split is no bigger than :samp:`{max_tile_bytes}`.
"""
return self.__max_tile_bytes
@max_tile_bytes.setter
def max_tile_bytes(self, max_tile_bytes):
self.__max_tile_bytes = max_tile_bytes
@property
def max_tile_shape(self):
"""
Per-axis maximum sizes for calculated tiles.
A 1D :samp:`numpy.ndarray` of :obj:`int` indicating the per-axis
maximum number of elements for tiles in the calculated split.
"""
return self.__max_tile_shape
@max_tile_shape.setter
def max_tile_shape(self, max_tile_shape):
self.__max_tile_shape = max_tile_shape
@property
def sub_tile_shape(self):
"""
Calculated tile shape will be an integer multiple of this sub-tile shape.
i.e. :samp:`(self.tile_shape[i] % self.sub_tile_shape[i]) == 0`,
for :samp:`i in range(0, len(self.tile_shape))`.
A 1D :samp:`numpy.ndarray` of :obj:`int` indicating sub-tile shape.
"""
return self.__sub_tile_shape
@sub_tile_shape.setter
def sub_tile_shape(self, sub_tile_shape):
self.__sub_tile_shape = sub_tile_shape
@property
def halo(self):
"""
Per-axis -ve and +ve halo sizes for extending tiles to overlap with neighbouring tiles.
A :samp:`(N, 2)` shaped array indicating the
"""
return self.__halo
@halo.setter
def halo(self, halo):
self.__halo = halo
@property
def tile_bounds_policy(self):
"""
A string indicating whether tile halo extents can extend beyond the array domain.
Valid values are indicated by :attr:`valid_tile_bounds_policies`.
"""
return self.__tile_bounds_policy
@tile_bounds_policy.setter
def tile_bounds_policy(self, tile_bounds_policy):
self.__tile_bounds_policy = tile_bounds_policy
@property
def tile_beg_min(self):
"""
The per-axis minimum index for :attr:`slice.start`. The per-axis lower bound for
tile start indices. A 1D :obj:`numpy.ndarray`.
"""
return self.__tile_beg_min
@tile_beg_min.setter
def tile_beg_min(self, tile_beg_min):
self.__tile_beg_min = tile_beg_min
@property
def tile_end_max(self):
"""
The per-axis maximum index for :attr:`slice.stop`. The per-axis upper bound for
tile stop indices. A 1D :obj:`numpy.ndarray`.
"""
return self.__tile_end_max
@tile_end_max.setter
def tile_end_max(self, tile_end_max):
self.__tile_end_max = tile_end_max
@property
def split_shape(self):
"""
The shape of the calculated split array. Indicates the per-axis number
of sections in the calculated split. A 1D :obj:`numpy.ndarray`.
"""
return self.__split_shape
@split_shape.setter
def split_shape(self, split_shape):
self.__split_shape = split_shape
@property
def split_begs(self):
"""
The list of per-axis start indices for :obj:`slice` objects.
A :obj:`list` of 1D :obj:`numpy.ndarray` objects indicating
the :attr:`slice.start` index for for tiles.
"""
return self.__split_begs
@split_begs.setter
def split_begs(self, split_begs):
self.__split_begs = split_begs
@property
def split_ends(self):
"""
The list of per-axis stop indices for :obj:`slice` objects.
A :obj:`list` of 1D :obj:`numpy.ndarray` objects indicating
the :attr:`slice.stop` index for for tiles.
"""
return self.__split_ends
@split_ends.setter
def split_ends(self, split_ends):
self.__split_ends = split_ends
[docs] def check_halo(self):
"""
Raises :obj:`ValueError` if there is an inconsistency
between shapes of :attr:`array_shape` and :attr:`halo`.
"""
if (
(len(self.halo.shape) != 2)
or
(self.halo.shape[0] != len(self.array_shape))
or
(self.halo.shape[1] != 2)
):
raise ValueError(
"Got halo.shape=%s, expecting halo.shape=(%s, 2)"
%
(self.halo.shape, self.array_shape.shape[0])
)
[docs] def check_tile_bounds_policy(self):
"""
Raises :obj:`ValueError` if :attr:`tile_bounds_policy`
is not in :samp:`[{self}.ARRAY_BOUNDS, {self}.NO_BOUNDS]`.
"""
if not (self.tile_bounds_policy in self.valid_tile_bounds_policies):
raise ValueError(
"Got self.tile_bounds_policy=%s, which is not in %s."
%
(self.tile_bounds_policy, self.valid_tile_bounds_policies)
)
[docs] def check_consistent_parameter_dimensions(self):
"""
Ensure that all parameter dimensions are consistent with
the :attr:`array_shape` dimension.
:raises ValueError: For inconsistent parameter dimensions.
"""
if self.indices_per_axis is not None:
if len(self.indices_per_axis) > len(self.array_shape):
raise ValueError(
"Got len(self.indices_per_axis)=%s > len(self.array_shape)=%s, should be equal."
%
(len(self.indices_per_axis), len(self.array_shape))
)
if self.split_num_slices_per_axis is not None:
if len(self.split_num_slices_per_axis) > len(self.array_shape):
raise ValueError(
(
"Got len(self.split_num_slices_per_axis)=%s > len(self.array_shape)=%s,"
+
" should be equal."
)
%
(len(self.split_num_slices_per_axis), len(self.array_shape))
)
if self.tile_shape is not None:
if len(self.tile_shape) != len(self.array_shape):
raise ValueError(
"Got len(self.tile_shape)=%s > len(self.array_shape)=%s, should be equal."
%
(len(self.tile_shape), len(self.array_shape))
)
if self.sub_tile_shape is not None:
if len(self.sub_tile_shape) != len(self.array_shape):
raise ValueError(
"Got len(self.sub_tile_shape)=%s > len(self.array_shape)=%s, should be equal."
%
(len(self.sub_tile_shape), len(self.array_shape))
)
if self.max_tile_shape is not None:
if len(self.max_tile_shape) != len(self.array_shape):
raise ValueError(
"Got len(self.max_tile_shape)=%s > len(self.array_shape)=%s, should be equal."
%
(len(self.max_tile_shape), len(self.array_shape))
)
if self.array_start is not None:
if len(self.array_start) != len(self.array_shape):
raise ValueError(
"Got len(self.array_start)=%s > len(self.array_shape)=%s, should be equal."
%
(len(self.array_start), len(self.array_shape))
)
[docs] def check_consistent_parameter_grouping(self):
"""
Ensures this object does not have conflicting groups of parameters.
:raises ValueError: For conflicting or absent parameters.
"""
parameter_groups = {}
if self.indices_per_axis is not None:
parameter_groups["indices_per_axis"] = \
{"self.indices_per_axis": self.indices_per_axis}
if (self.split_size is not None) or (self.split_num_slices_per_axis is not None):
parameter_groups["split_size"] = \
{
"self.split_size": self.split_size,
"self.split_num_slices_per_axis": self.split_num_slices_per_axis,
}
if self.tile_shape is not None:
parameter_groups["tile_shape"] = \
{"self.tile_shape": self.tile_shape}
if self.max_tile_bytes is not None:
parameter_groups["max_tile_bytes"] = \
{"self.max_tile_bytes": self.max_tile_bytes}
if self.max_tile_shape is not None:
if "max_tile_bytes" not in parameter_groups.keys():
parameter_groups["max_tile_bytes"] = {}
parameter_groups["max_tile_bytes"]["self.max_tile_shape"] = self.max_tile_shape
if self.sub_tile_shape is not None:
if "max_tile_bytes" not in parameter_groups.keys():
parameter_groups["max_tile_bytes"] = {}
parameter_groups["max_tile_bytes"]["self.sub_tile_shape"] = self.sub_tile_shape
if (len(parameter_groups.keys()) > 1):
group_keys = sorted(parameter_groups.keys())
raise ValueError(
"Got conflicting parameter groups specified, "
+
"should only specify one group to define the split:\n"
+
(
"\n".join(
[
(
("Group %18s: " % ("'%s'" % group_key))
+
str(parameter_groups[group_key])
)
for group_key in group_keys
]
)
)
)
if (len(parameter_groups.keys()) <= 0):
raise ValueError(
"No split parameters specified, need parameters from one of the groups: "
+
"'indices_per_axis', 'split_size', 'tile_shape' or 'max_tile_bytes'"
)
[docs] def check_split_parameters(self):
"""
Ensures this object has a state consistent with evaluating a split.
:raises ValueError: For conflicting or absent parameters.
"""
self.check_halo()
self.check_tile_bounds_policy()
self.check_consistent_parameter_dimensions()
self.check_consistent_parameter_grouping()
[docs] def update_tile_extent_bounds(self):
"""
Updates the :attr:`tile_beg_min` and :attr:`tile_end_max`
data members according to :attr:`tile_bounds_policy`.
"""
if self.tile_bounds_policy == NO_BOUNDS:
self.tile_beg_min = self.array_start - self.halo[:, 0]
self.tile_end_max = self.array_start + self.array_shape + self.halo[:, 1]
elif self.tile_bounds_policy == ARRAY_BOUNDS:
self.tile_beg_min = self.array_start
self.tile_end_max = self.array_start + self.array_shape
[docs] def set_split_extents_by_indices_per_axis(self):
"""
Sets split shape :attr:`split_shape` and
split extents (:attr:`split_begs` and :attr:`split_ends`)
from values in :attr:`indices_per_axis`.
"""
if self.indices_per_axis is None:
raise ValueError("Got None for self.indices_per_axis")
self.logger.debug("self.array_shape=%s", self.array_shape)
self.logger.debug("self.indices_per_axis=%s", self.indices_per_axis)
self.indices_per_axis = \
pad_with_none(self.indices_per_axis, len(self.array_shape))
# Define the start and stop indices (extents) for each axis slice
self.split_shape = _np.ones(len(self.array_shape), dtype="int64")
self.split_begs = [[], ] * len(self.array_shape)
self.split_ends = [[], ] * len(self.array_shape)
for i in range(len(self.indices_per_axis)):
indices = self.indices_per_axis[i]
if (indices is not None) and (len(indices) > 0):
self.split_shape[i] = len(indices) + 1
self.split_begs[i] = _np.zeros((len(indices) + 1,), dtype="int64")
self.split_begs[i][1:] = indices
self.split_ends[i] = _np.zeros((len(self.split_begs[i]),), dtype="int64")
self.split_ends[i][0:-1] = self.split_begs[i][1:]
self.split_ends[i][-1] = self.array_shape[i]
else:
# start and stop is the full width of the axis
self.split_begs[i] = [0, ]
self.split_ends[i] = [self.array_shape[i], ]
self.logger.debug("self.indices_per_axis=%s", self.indices_per_axis)
[docs] def calculate_split_from_extents(self):
"""
Returns split calculated using extents obtained
from :attr:`split_begs` and :attr:`split_ends`.
:rtype: :obj:`numpy.ndarray`
:return:
A :mod:`numpy` `structured array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_
where each element is a :obj:`tuple` of :obj:`slice` objects.
"""
self.logger.debug("self.split_shape=%s", self.split_shape)
self.logger.debug("self.split_begs=%s", self.split_begs)
self.logger.debug("self.split_ends=%s", self.split_ends)
ret = \
_np.array(
[
tuple(
[
slice(
max([
self.split_begs[d][idx[d]]
+ self.array_start[d] - self.halo[d, 0],
self.tile_beg_min[d]
]),
min([
self.split_ends[d][idx[d]]
+ self.array_start[d] + self.halo[d, 1],
self.tile_end_max[d]
])
)
for d in range(len(self.split_shape))
]
)
for idx in
_np.array(
_np.unravel_index(
_np.arange(0, _np.product(self.split_shape)),
self.split_shape
)
).T
],
dtype=[("%d" % d, "object") for d in range(len(self.split_shape))]
).reshape(self.split_shape)
return ret
[docs] def calculate_split_by_indices_per_axis(self):
"""
Returns split calculated using extents obtained
from :attr:`indices_per_axis`.
:rtype: :obj:`numpy.ndarray`
:return:
A :mod:`numpy` `structured array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_
where each element is a :obj:`tuple` of :obj:`slice` objects.
"""
self.set_split_extents_by_indices_per_axis()
return self.calculate_split_from_extents()
[docs] def calculate_axis_split_extents(self, num_sections, size):
"""
Divides :samp:`range(0, {size})` into (approximately) equal sized
intervals. Returns :samp:`(begs, ends)` where :samp:`slice(begs[i], ends[i])`
define the intervals for :samp:`i in range(0, {num_sections})`.
:type num_sections: :obj:`int`
:param num_sections: Divide :samp:`range(0, {size})` into this
many intervals (approximately) equal sized intervals.
:type size: :obj:`int`
:param size: Range for the subdivision.
:rtype: :obj:`tuple`
:return: Two element tuple :samp:`(begs, ends)`
such that :samp:`slice(begs[i], ends[i])` define the
intervals for :samp:`i in range(0, {num_sections})`.
"""
section_size = size // num_sections
if section_size >= 1:
begs = _np.arange(0, section_size * num_sections, section_size)
rem = size - section_size * num_sections
if rem > 0:
for i in range(rem):
begs[i + 1:] += 1
ends = _np.zeros_like(begs)
ends[0:-1] = begs[1:]
ends[-1] = size
else:
begs = _np.arange(0, num_sections)
begs[size:] = size
ends = begs.copy()
ends[0:-1] = begs[1:]
return begs, ends
[docs] def set_split_extents_by_split_size(self):
"""
Sets split shape :attr:`split_shape` and
split extents (:attr:`split_begs` and :attr:`split_ends`)
from values in :attr:`split_size` and :attr:`split_num_slices_per_axis`.
"""
if self.split_size is None:
if (
_np.all([s is not None for s in self.split_num_slices_per_axis])
and
_np.all([s > 0 for s in self.split_num_slices_per_axis])
):
self.split_size = _np.product(self.split_num_slices_per_axis)
else:
raise ValueError(
(
"Got invalid self.split_num_slices_per_axis=%s, all elements "
+
"need to be integers greater than zero when self.split_size is None."
)
%
self.split_num_slices_per_axis
)
self.logger.debug(
"Pre cannonicalise: self.split_num_slices_per_axis=%s",
self.split_num_slices_per_axis)
self.split_num_slices_per_axis = \
calculate_num_slices_per_axis(
self.split_num_slices_per_axis,
self.split_size,
self.array_shape
)
self.logger.debug(
"Post cannonicalise: self.split_num_slices_per_axis=%s",
self.split_num_slices_per_axis)
# Define the start and stop indices (extents) for each axis slice
self.split_shape = self.split_num_slices_per_axis.copy()
self.split_begs = [[], ] * len(self.array_shape)
self.split_ends = [[], ] * len(self.array_shape)
for i in range(len(self.array_shape)):
self.split_begs[i], self.split_ends[i] = \
self.calculate_axis_split_extents(
self.split_shape[i],
self.array_shape[i]
)
[docs] def calculate_split_by_split_size(self):
"""
Returns split calculated using extents obtained
from :attr:`split_size` and :attr:`split_num_slices_per_axis`.
:rtype: :obj:`numpy.ndarray`
:return:
A :mod:`numpy` `structured array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_
where each element is a :obj:`tuple` of :obj:`slice` objects.
"""
self.set_split_extents_by_split_size()
return self.calculate_split_from_extents()
[docs] def set_split_extents_by_tile_shape(self):
"""
Sets split shape :attr:`split_shape` and
split extents (:attr:`split_begs` and :attr:`split_ends`)
from value of :attr:`tile_shape`.
"""
self.split_shape = ((self.array_shape - 1) // self.tile_shape) + 1
self.split_begs = [[], ] * len(self.array_shape)
self.split_ends = [[], ] * len(self.array_shape)
for i in range(len(self.array_shape)):
self.split_begs[i] = _np.arange(0, self.array_shape[i], self.tile_shape[i])
self.split_ends[i] = _np.zeros_like(self.split_begs[i])
self.split_ends[i][0:-1] = self.split_begs[i][1:]
self.split_ends[i][-1] = self.array_shape[i]
[docs] def calculate_split_by_tile_shape(self):
"""
Returns split calculated using extents obtained
from :attr:`tile_shape`.
:rtype: :obj:`numpy.ndarray`
:return:
A :mod:`numpy` `structured array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_
where each element is a :obj:`tuple` of :obj:`slice` objects.
"""
self.set_split_extents_by_tile_shape()
return self.calculate_split_from_extents()
[docs] def set_split_extents_by_tile_max_bytes(self):
"""
Sets split extents (:attr:`split_begs`
and :attr:`split_ends`) calculated using
from :attr:`max_tile_bytes`
(and :attr:`max_tile_shape`, :attr:`sub_tile_shape`, :attr:`halo`).
"""
self.tile_shape = \
calculate_tile_shape_for_max_bytes(
array_shape=self.array_shape,
array_itemsize=self.array_itemsize,
max_tile_bytes=self.max_tile_bytes,
max_tile_shape=self.max_tile_shape,
sub_tile_shape=self.sub_tile_shape,
halo=self.halo
)
self.set_split_extents_by_tile_shape()
[docs] def calculate_split_by_tile_max_bytes(self):
"""
Returns split calculated using extents obtained
from :attr:`max_tile_bytes`
(and :attr:`max_tile_shape`, :attr:`sub_tile_shape`, :attr:`halo`).
:rtype: :obj:`numpy.ndarray`
:return:
A :mod:`numpy` `structured array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_
where each element is a :obj:`tuple` of :obj:`slice` objects.
"""
self.set_split_extents_by_tile_max_bytes()
return self.calculate_split_from_extents()
[docs] def set_split_extents(self):
"""
Sets split extents (:attr:`split_begs`
and :attr:`split_ends`) calculated using
selected attributes set from :meth:`__init__`.
"""
self.check_split_parameters()
self.update_tile_extent_bounds()
if self.indices_per_axis is not None:
self.set_split_extents_by_indices_per_axis()
elif (self.split_size is not None) or (self.split_num_slices_per_axis is not None):
self.set_split_extents_by_split_size()
elif self.tile_shape is not None:
self.set_split_extents_by_tile_shape()
elif self.max_tile_bytes is not None:
self.set_split_extents_by_tile_max_bytes()
[docs] def calculate_split(self):
"""
Computes the split.
:rtype: :obj:`numpy.ndarray`
:return:
A :mod:`numpy` `structured array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_
of dimension :samp:`len({self}.array_shape)`.
Each element of the returned array is a :obj:`tuple`
containing :samp:`len({self}.array_shape)` elements, with each element
being a :obj:`slice` object. Each :obj:`tuple` defines a slice within
the bounds :samp:`{self}.array_start - {self}.halo[:, 0]`
to :samp:`{self}.array_start + {self}.array_shape + {self}.halo[:, 1]`.
"""
self.set_split_extents()
return self.calculate_split_from_extents()
ShapeSplitter([0, ]).__init__.__func__.__doc__ = \
"""
Initialises parameters which define a split.
%s
%s
.. seealso:: :ref:`array_split-examples`
""" % (
_array_shape_param_doc,
(
_ShapeSplitter__init__params_doc
%
(
_array_start_param_doc,
"\n" + _array_itemsize_param_doc,
_halo_param_doc,
_array_tile_bounds_policy_param_doc,
)
)
)
[docs]def shape_split(array_shape, *args, **kwargs):
return \
ShapeSplitter(
array_shape,
*args,
**kwargs
).calculate_split()
shape_split.__doc__ =\
"""
Splits specified :samp:`{array_shape}` in tiles, returns array of :obj:`slice` tuples.
%s
%s
:rtype: :obj:`numpy.ndarray`
:return: Array of :obj:`tuple` objects. Each :obj:`tuple` element
is a :obj:`slice` object so that each :obj:`tuple` defines
a multi-dimensional slice of an array of shape :samp:`{array_shape}`.
.. seealso:: :func:`array_split.array_split`, :meth:`array_split.ShapeSplitter`,
:ref:`array_split-examples`
""" % (
_array_shape_param_doc,
(
_ShapeSplitter__init__params_doc
%
(
_array_start_param_doc,
"\n" + _array_itemsize_param_doc,
_halo_param_doc,
_array_tile_bounds_policy_param_doc,
)
)
)
[docs]def array_split(
ary,
indices_or_sections=None,
axis=None,
tile_shape=None,
max_tile_bytes=None,
max_tile_shape=None,
sub_tile_shape=None,
halo=None
):
return [
ary[slyce]
for slyce in
shape_split(
array_shape=ary.shape,
indices_or_sections=indices_or_sections,
axis=axis,
array_start=None,
array_itemsize=ary.itemsize,
tile_shape=tile_shape,
max_tile_bytes=max_tile_bytes,
max_tile_shape=max_tile_shape,
sub_tile_shape=sub_tile_shape,
halo=halo,
tile_bounds_policy=ARRAY_BOUNDS
).flatten()
]
array_split.__doc__ =\
"""
Splits the specified array :samp:`{ary}` into sub-arrays, returns list of :obj:`numpy.ndarray`.
:type ary: :obj:`numpy.ndarray`
:param ary: Array which is split into sub-arrays.
%s
:rtype: :obj:`list`
:return: List of :obj:`numpy.ndarray` elements, where each element is
a *slice* from :samp:`{ary}` (potentially an empty slice).
.. seealso:: :func:`array_split.shape_split`, :meth:`array_split.ShapeSplitter`,
:ref:`array_split-examples`
""" % (
_ShapeSplitter__init__params_doc
%
(
"",
"",
_halo_param_doc.replace("len({array_shape})", "len({ary}.shape)"),
""
)
)
__all__ = [s for s in dir() if not s.startswith('_')]