From e72fc4cc7c1192ed171cdfb53816a79c890bb94e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Fri, 27 Mar 2026 16:27:04 +0100 Subject: [PATCH 01/28] Revert "FEAT/CLN: generalize set_labels to support groups as keys and "expressions" as labels" This reverts commit c7e583631d191fd9ec76af9ed8229e64ce7b743b. --- doc/source/api.rst | 3 +- doc/source/changes/version_0_36.rst.inc | 24 +-- larray/core/array.py | 38 ++--- larray/core/axis.py | 197 +++++++++++------------- larray/core/group.py | 103 ++++++------- larray/inout/pandas.py | 2 +- larray/tests/test_array.py | 14 +- larray/tests/test_axis.py | 14 -- 8 files changed, 168 insertions(+), 227 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index a8327f90f..73e13c8c7 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -64,7 +64,8 @@ Modifying/Selecting Axis.rename Axis.extend Axis.insert - Axis.set_labels + Axis.replace + Axis.apply Axis.union Axis.intersection Axis.difference diff --git a/doc/source/changes/version_0_36.rst.inc b/doc/source/changes/version_0_36.rst.inc index 6820fb727..a23f5c467 100644 --- a/doc/source/changes/version_0_36.rst.inc +++ b/doc/source/changes/version_0_36.rst.inc @@ -4,9 +4,6 @@ Syntax changes ^^^^^^^^^^^^^^ -* ``Axis.apply()`` and ``Axis.replace()`` are deprecated in favor of - :py:obj:`Axis.set_labels()`. - * renamed ``Array.old_method_name()`` to :py:obj:`Array.new_method_name()` (closes :issue:`1`). * renamed ``old_argument_name`` argument of :py:obj:`Array.method_name()` to ``new_argument_name``. @@ -52,26 +49,7 @@ New features Miscellaneous improvements ^^^^^^^^^^^^^^^^^^^^^^^^^^ -* :py:obj:`Array.set_labels()` and :py:obj:`Axis.set_labels()` (formerly - ``Axis.replace()`` and ``Axis.apply()``) now accepts slices, Groups or - selection strings as the labels to change, and callable and - "creation strings" as the new labels. This makes it easier to change - only a subset of labels or to change several labels in the same way - (closes :issue:`906`). - - >>> arr = ndtest((2, 3)) - >>> arr - a\b b0 b1 b2 - a0 0 1 2 - a1 3 4 5 - >>> arr.set_labels({'b1:': str.upper, 'a1': 'A-ONE'}) - a\b b0 B1 B2 - a0 0 1 2 - A-ONE 3 4 5 - >>> arr.set_labels('b1:', 'B1..B2') - a\b b0 B1 B2 - a0 0 1 2 - a1 3 4 5 +* improved something. Fixes diff --git a/larray/core/array.py b/larray/core/array.py index 2dcc2e686..a88920346 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -283,7 +283,7 @@ def concat(arrays, axis=0, dtype=None): # switch to object dtype if labels are of incompatible types, so that we do not implicitly convert numeric types to # strings (numpy should not do this in the first place but that is another story). This can happen for example when - # we want to add a "total" label to a numeric axis (eg age). + # we want to add a "total" tick to a numeric axis (eg age). combined_axis = Axis(concatenate_ndarrays(arrays_labels), name) # combine all axes (using labels from any side if any) @@ -2147,7 +2147,7 @@ def sort_values(self, key=None, axis=None, ascending=True) -> 'Array': # 1 2 0, axis='nat') # which sorts the *data* correctly, but the labels on the nat axis are not sorted # (because the __getitem__ in that case reuse the key axis as-is -- like it should). - # Both use cases have value, but I think reordering the labels should be the default. + # Both use cases have value, but I think reordering the ticks should be the default. # Now, I am unsure where to change this. Probably in IGroupMaker.__getitem__, # but then how do I get the "not reordering labels" behavior that I have now? # FWIW, using .data, I get IGroup([1, 2, 0], axis='nat'), which works. @@ -2684,19 +2684,19 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam # get list of labels for each axis (except the last one if wide=True) labels = [ensure_no_numpy_type(axis.labels) for axis in axes] - # creates vertical lines (labels is a list of list) + # creates vertical lines (ticks is a list of list) if self.ndim == 1 and wide: if dump_axes_names is True: # There is no vertical axis, so the axis name should not have - # any "label" below it and we add an empty "label". - labels = [['']] + # any "tick" below it and we add an empty "tick". + ticks = [['']] else: # There is no vertical axis but no axis name either - labels = [[]] + ticks = [[]] elif light: - labels = light_product(*labels) + ticks = light_product(*labels) else: - labels = Product(labels) + ticks = Product(labels) # computes the first line other_colnames = ensure_no_numpy_type(self.axes[-1].labels) if wide else [value_name] @@ -2706,14 +2706,14 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam if maxlines != -1 and height > maxlines: # replace middle lines of the table by '...'. # We show only the first and last edgeitems lines. - res2d.extend([list(label) + dataline - for label, dataline in zip(labels[:edgeitems], ensure_no_numpy_type(data[:edgeitems]))]) + res2d.extend([list(tick) + dataline + for tick, dataline in zip(ticks[:edgeitems], ensure_no_numpy_type(data[:edgeitems]))]) res2d.append(["..."] * (self.ndim - 1 + width)) - res2d.extend([list(label) + dataline - for label, dataline in zip(labels[-edgeitems:], ensure_no_numpy_type(data[-edgeitems:]))]) + res2d.extend([list(tick) + dataline + for tick, dataline in zip(ticks[-edgeitems:], ensure_no_numpy_type(data[-edgeitems:]))]) else: # all other lines (labels of N-1 first axes + data) - res2d.extend([list(label) + ensure_no_numpy_type(dataline) for label, dataline in zip(labels, data)]) + res2d.extend([list(tick) + ensure_no_numpy_type(dataline) for tick, dataline in zip(ticks, data)]) if na_repr != 'as_is': res2d = [[na_repr if value != value else value @@ -7513,6 +7513,7 @@ def __array__(self, dtype=None, copy=None): __array_priority__ = 100 + # TODO: this should be a thin wrapper around a method in AxisCollection def set_labels(self, axis=None, labels=None, inplace=False, **kwargs) -> 'Array': r"""Replace the labels of one or several axes of the array. @@ -7610,18 +7611,13 @@ def set_labels(self, axis=None, labels=None, inplace=False, **kwargs) -> 'Array' nat\sex Men F Belgian 0 1 FO 2 3 - - >>> a.set_labels({'M:F': str.lower, 'BE': 'Belgian', 'FO': 'Foreigner'}) - nat\sex m f - Belgian 0 1 - Foreigner 2 3 """ - new_axes = self.axes.set_labels(axis, labels, **kwargs) + axes = self.axes.set_labels(axis, labels, **kwargs) if inplace: - self.axes = new_axes + self.axes = axes return self else: - return Array(self.data, new_axes) + return Array(self.data, axes) def astype(self, dtype, order='K', casting='unsafe', subok=True, copy=True) -> 'Array': return Array(self.data.astype(dtype, order, casting, subok, copy), self.axes) diff --git a/larray/core/axis.py b/larray/core/axis.py index 9941982e0..ed1c52b8f 100644 --- a/larray/core/axis.py +++ b/larray/core/axis.py @@ -11,9 +11,8 @@ from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCArray from larray.core.expr import ExprNode -from larray.core.group import (Group, LGroup, IGroup, IGroupMaker, _to_label, _to_labels, _to_key, _seq_summary, - _idx_seq_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups, - _to_label_or_labels) +from larray.core.group import (Group, LGroup, IGroup, IGroupMaker, _to_tick, _to_ticks, _to_key, _seq_summary, + _idx_seq_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups) from larray.util.oset import OrderedSet from larray.util.misc import (duplicates, array_lookup2, ReprString, index_by_id, renamed_to, LHDFStore, lazy_attribute, _isnoneslice, unique_list, unique_multi, Product, argsort, has_duplicates, @@ -116,15 +115,15 @@ def _retarget_warn_msg(key, real_axis, current_eval=None, future_eval=None): "Using a Group object which was used to create an aggregate to " \ "target its aggregated label is deprecated. " \ "Please use the aggregated label directly instead. " \ - "In this case, you should use {potential_label!r} instead of " \ + "In this case, you should use {potential_tick!r} instead of " \ "using {key!r}." -def _group_as_aggregated_label_msg(key, potential_label=None): - if potential_label is None: - potential_label = _to_label(key) +def _group_as_aggregated_label_msg(key, potential_tick=None): + if potential_tick is None: + potential_tick = _to_tick(key) return _GROUP_AS_AGGREGATED_LABEL_MSG_TEMPLATE.format( - potential_label=potential_label, + potential_tick=potential_tick, key=key ) @@ -305,7 +304,7 @@ def labels(self, labels): labels = np.arange(length) iswildcard = True else: - labels = _to_labels(labels, parse_single_int=True) + labels = _to_ticks(labels, parse_single_int=True) length = len(labels) iswildcard = False @@ -976,8 +975,8 @@ def _ipython_key_completions_(self) -> List[Scalar]: return list(self.labels) def __contains__(self, key) -> bool: - # TODO: ideally, _to_label shouldn't be necessary, the __hash__ and __eq__ of Group should include this - return _to_label(key) in self._mapping + # TODO: ideally, _to_tick shouldn't be necessary, the __hash__ and __eq__ of Group should include this + return _to_tick(key) in self._mapping # use the default hash. We have to specify it explicitly because we define __eq__ __hash__ = object.__hash__ @@ -1013,9 +1012,6 @@ def index(self, key) -> Union[int, np.ndarray, slice]: 3 >>> people.index(people.containing('Bruce')) array([1, 2]) - >>> a = Axis('a0..a5', 'a') - >>> a.index('a1,a3,a2..a4') - array([1, 3, 2, 3, 4]) """ mapping = self._mapping @@ -1031,17 +1027,18 @@ def index(self, key) -> Union[int, np.ndarray, slice]: # TODO: remove this as it is potentially very expensive # if key.key is an array or list and should be tried # as a last resort - potential_label = _to_label(key) + potential_tick = _to_tick(key) + # avoid matching 0 against False or 0.0, note that None has # object dtype and so always pass this test - if self._is_key_type_compatible(potential_label): + if self._is_key_type_compatible(potential_tick): try: - res_idx = mapping[potential_label] - if potential_label != key.key: + res_idx = mapping[potential_tick] + if potential_tick != key.key: raise ValueError( _group_as_aggregated_label_msg( key, - potential_label + potential_tick ) ) return res_idx @@ -1053,7 +1050,7 @@ def index(self, key) -> Union[int, np.ndarray, slice]: pass if isinstance(key, str): - # try the key as-is to allow getting at labels with special characters (",", ":", ...) + # try the key as-is to allow getting at ticks with special characters (",", ":", ...) try: # avoid matching 0 against False or 0.0, note that Group keys have object dtype and so always pass this # test @@ -1249,35 +1246,16 @@ def copy(self) -> 'Axis': new_axis.__sorted_values = self.__sorted_values return new_axis - def set_labels(self, old_or_changes, new=None) -> 'Axis': + def replace(self, old, new=None) -> 'Axis': r""" - Return a new axis with some labels changed. - - It supports three distinct syntax variants: - - * Axis.set_labels(new_labels) -> replace all Axis labels by `new_labels` - * Axis.set_labels(label_selection, new_labels) -> replace selection of labels by `new_labels` - * Axis.set_labels({old1: new1, old2: new2}) -> replace each selection of labels by corresponding new labels - - Additionally, new labels in any of the above forms can be a function which transforms the existing - labels to produce the actual new labels. + Return a new axis with some labels replaced. Parameters ---------- - old_or_changes : any scalar (bool, int, str, ...), tuple/list/array of scalars, Group, callable or mapping. - This can be either: - - * A selection of label(s) to be replaced. This can take several forms: - - a single label (e.g. 'France') - - a list of labels (e.g. ['France', 'Germany']) - - a comma-separated string of labels (e.g. 'France,Germany') - - a Group (e.g. country['France']) - * A mapping {selection1: new_labels1, selection2: new_labels2, ...} - * New labels, in which case all the axis labels will be replaced by these new labels and - the `new` argument must not be used. - new : any scalar (bool, int, str, ...) or tuple/list/array of scalars or callable, optional - The new label(s) or function to apply to old labels to get the new labels. This is argument must not be - used if `old_or_changes` contains the new labels or if it is a mapping. + old : any scalar (bool, int, str, ...), tuple/list/array of scalars, or a mapping. + the label(s) to be replaced. Old can be a mapping {old1: new1, old2: new2, ...} + new : any scalar (bool, int, str, ...) or tuple/list/array of scalars, optional + the new label(s). This is argument must not be used if old is a mapping. Returns ------- @@ -1286,54 +1264,55 @@ def set_labels(self, old_or_changes, new=None) -> 'Axis': Examples -------- - >>> country = Axis('country=be,de,fr') - >>> country - Axis(['be', 'de', 'fr'], 'country') - >>> country.set_labels('be', 'Belgium') - Axis(['Belgium', 'de', 'fr'], 'country') - >>> country.set_labels({'de': 'Germany', 'fr': 'France'}) - Axis(['be', 'Germany', 'France'], 'country') - >>> country.set_labels(['be', 'fr'], ['Belgium', 'France']) - Axis(['Belgium', 'de', 'France'], 'country') - >>> country.set_labels('be,de', 'Belgium-Germany') - Axis(['Belgium-Germany', 'Belgium-Germany', 'fr'], 'country') - >>> country.set_labels('be,de', ['Belgium', 'Germany']) - Axis(['Belgium', 'Germany', 'fr'], 'country') - >>> country.set_labels(str.upper) - Axis(['BE', 'DE', 'FR'], 'country') - """ - # FIXME: compute max(length of new keys and old labels array) instead - # XXX: it might be easier to go via list to get the label type auto-detection - # labels = self.labels.tolist() - - # using object dtype because new labels length can be larger than the fixed str length in self.labels - labels = self.labels.astype(object) - get_indices = self.index - - def apply_changes(selection, label_change): - old_indices = get_indices(selection) - if callable(label_change): - old_labels = labels[old_indices] - if isinstance(old_labels, np.ndarray): - np_func = np_frompyfunc(label_change, 1, 1) - new_labels = np_func(old_labels) - else: - new_labels = label_change(old_labels) - else: - new_labels = _to_label_or_labels(label_change) - labels[old_indices] = new_labels - - if new is None and not isinstance(old_or_changes, dict): - apply_changes(slice(None), old_or_changes) - elif new is not None: - apply_changes(old_or_changes, new) + >>> sex = Axis('sex=M,F') + >>> sex + Axis(['M', 'F'], 'sex') + >>> sex.replace('M', 'Male') + Axis(['Male', 'F'], 'sex') + >>> sex.replace({'M': 'Male', 'F': 'Female'}) + Axis(['Male', 'Female'], 'sex') + >>> sex.replace(['M', 'F'], ['Male', 'Female']) + Axis(['Male', 'Female'], 'sex') + """ + if isinstance(old, dict): + new = list(old.values()) + old = list(old.keys()) + elif np.isscalar(old): + assert new is not None and np.isscalar(new), f"{new} is not a scalar but a {type(new).__name__}" + old = [old] + new = [new] else: - assert new is None and isinstance(old_or_changes, dict) - for old, new in old_or_changes.items(): - apply_changes(old, new) + seq = (tuple, list, np.ndarray) + assert isinstance(old, seq), f"{old} is not a sequence but a {type(old).__name__}" + assert isinstance(new, seq), f"{new} is not a sequence but a {type(new).__name__}" + assert len(old) == len(new) + # using object dtype because new labels length can be larger than the fixed str length in the self.labels array + labels = self.labels.astype(object) + indices = self.index(old) + labels[indices] = new return Axis(labels, self.name) - apply = renamed_to(set_labels, 'apply') - replace = renamed_to(set_labels, 'replace') + + def apply(self, func) -> 'Axis': + r""" + Return a new axis with the labels transformed by func. + + Parameters + ---------- + func : callable + A callable which takes a single argument and returns a single value. + + Returns + ------- + Axis + a new Axis with the transformed labels. + + Examples + -------- + >>> sex = Axis('sex=MALE,FEMALE') + >>> sex.apply(str.capitalize) + Axis(['Male', 'Female'], 'sex') + """ + return Axis(np_frompyfunc(func, 1, 1)(self.labels), self.name) # XXX: rename to named like Group? def rename(self, name) -> 'Axis': @@ -1342,7 +1321,7 @@ def rename(self, name) -> 'Axis': Parameters ---------- - name : str, Axis + name : str the new name for the axis. Returns @@ -1395,7 +1374,7 @@ def union(self, other) -> 'Axis': Axis(['a0', 'a1', 'a2', 'a3'], 'a') """ non_string_scalar = np.isscalar(other) and not isinstance(other, str) - other = [other] if non_string_scalar else _to_labels(other) + other = [other] if non_string_scalar else _to_ticks(other) return Axis(unique_multi((self.labels, other)), self.name) def intersection(self, other) -> 'Axis': @@ -1430,7 +1409,7 @@ def intersection(self, other) -> 'Axis': Axis(['a0', 'a1', 'a0'], 'a') """ non_string_scalar = np.isscalar(other) and not isinstance(other, str) - other = [other] if non_string_scalar else _to_labels(other) + other = [other] if non_string_scalar else _to_ticks(other) to_keep = set(other) return Axis([label for label in self.labels if label in to_keep], self.name) @@ -1464,7 +1443,7 @@ def difference(self, other) -> 'Axis': Axis(['a0'], 'a') """ non_string_scalar = np.isscalar(other) and not isinstance(other, str) - other = [other] if non_string_scalar else _to_labels(other) + other = [other] if non_string_scalar else _to_ticks(other) to_drop = set(other) return Axis([label for label in self.labels if label not in to_drop], self.name) @@ -2091,7 +2070,7 @@ def isaxis(self, value) -> bool: >>> col.isaxis('c') False """ - # this is tricky. 0 and 1 can be both axes indices and axes labels. + # this is tricky. 0 and 1 can be both axes indices and axes ticks. # not sure what's worse: # 1) disallow aggregates(axis_num): users could still use arr.sum(arr.axes[0]) # we could also provide an explicit kwarg (ie this would effectively forbid having an axis named "axis"). @@ -2100,13 +2079,13 @@ def isaxis(self, value) -> bool: return isinstance(value, Axis) or (isinstance(value, str) and value in self) # 2) slightly inconsistent API: allow aggregate over single labels if they are string, but not int # arr.sum(0) would sum on the first axis, but arr.sum('M') would - # sum a single label. I don't like this option. - # 3) disallow single label aggregates. Single labels make little sense in the context of an aggregate, + # sum a single tick. I don't like this option. + # 3) disallow single tick aggregates. Single labels make little sense in the context of an aggregate, # but you don't always know/want to differenciate the code in that case anyway. # It would be annoying for e.g. Brussels # 4) give priority to axes, # arr.sum(0) would sum on the first axis but arr.sum(5) would - # sum a single label (assuming there is a int axis and less than six axes). + # sum a single tick (assuming there is a int axis and less than six axes). # return value in self def __len__(self) -> int: @@ -2765,14 +2744,25 @@ def set_labels(self, axis=None, labels=None, inplace=False, **kwargs) -> 'AxisCo # handle {label1: new_label1, label2: new_label2} if any(axis_ref not in self for axis_ref in changes.keys()): changes_per_axis = defaultdict(list) - for selection, label_changes in changes.items(): + for selection, new_labels in changes.items(): group = self._guess_axis(selection) axis = group.axis - changes_per_axis[axis].append((group, label_changes)) + changes_per_axis[axis].append((selection, new_labels)) changes = {axis: dict(axis_changes) for axis, axis_changes in changes_per_axis.items()} - return self.replace({old_axis: self[old_axis].set_labels(axis_changes) for old_axis, axis_changes in - changes.items()}, inplace=inplace) + new_axes = [] + for old_axis, axis_changes in changes.items(): + real_axis = self[old_axis] + if isinstance(axis_changes, dict): + new_axis = real_axis.replace(axis_changes) + # TODO: we should implement the non-dict behavior in Axis.replace, so that we can simplify this code to: + # new_axes = [self[old_axis].replace(axis_changes) for old_axis, axis_changes in changes.items()] + elif callable(axis_changes): + new_axis = real_axis.apply(axis_changes) + else: + new_axis = Axis(axis_changes, real_axis.name) + new_axes.append((real_axis, new_axis)) + return self.replace(new_axes, inplace=inplace) # TODO: deprecate method (should use __sub__ instead) def without(self, axes) -> 'AxisCollection': @@ -3755,7 +3745,6 @@ def align(self, *other, join='outer', axes=None) -> Tuple['AxisCollection']: See Also -------- Array.align - Axis.align Examples -------- diff --git a/larray/core/group.py b/larray/core/group.py index 6c49d0661..c8c39ed27 100644 --- a/larray/core/group.py +++ b/larray/core/group.py @@ -345,9 +345,9 @@ def _seq_group_to_name(seq) -> Sequence[Any]: return seq -def _to_label(v) -> Scalar: +def _to_tick(v) -> Scalar: r""" - Convert any value to a label (ie make it hashable, and acceptable as an ndarray element). + Convert any value to a tick (ie makes it hashable, and acceptable as an ndarray element). scalar -> not modified slice -> 'start:stop' @@ -364,7 +364,7 @@ def _to_label(v) -> Scalar: Returns ------- any scalar - scalar representing the label + scalar representing the tick """ # the fact that an "aggregated tick" is passed as a LGroup or as a string should be as irrelevant as possible. # The thing is that we cannot (currently) use the more elegant _to_tick(e.key) that means the LGroup is not @@ -374,7 +374,7 @@ def _to_label(v) -> Scalar: if np.isscalar(v): return v elif isinstance(v, Group): - return v.name if v.name is not None else _to_label(v.to_label()) + return v.name if v.name is not None else _to_tick(v.to_label()) elif isinstance(v, slice): return _slice_to_str(v) elif isinstance(v, (tuple, list)): @@ -387,41 +387,7 @@ def _to_label(v) -> Scalar: return str(v) -def _to_label_or_labels(value, parse_single_int=False): - if isinstance(value, ABCAxis): - return value.labels - elif isinstance(value, Group): - # a single LGroup used for all ticks of an Axis - # XXX: unsure _to_ticks() is necessary as s.eval() should return existing labels - # In fact, calling _to_ticks is only necessary because Group keys are not - # checked enough, especially for groups without axis, or with - # AxisReference/string axes - return _to_label_or_labels(value.eval()) - elif isinstance(value, np.ndarray): - # we assume it has already been translated - # XXX: Is it a safe assumption? - return value - if isinstance(value, pd.Index): - return value.values - elif isinstance(value, (list, tuple)): - return [_to_label(v) for v in value] - elif isinstance(value, range): - return value - elif isinstance(value, str): - labels = _seq_str_to_seq(value, parse_single_int=parse_single_int) - if isinstance(labels, slice): - raise ValueError("using : to define axes is deprecated, please use .. instead") - return labels - elif hasattr(value, '__array__'): - return value.__array__() - else: - try: - return list(value) - except TypeError: - raise TypeError(f"ticks must be iterable ({type(value)} is not)") - - -def _to_labels(value, parse_single_int=False) -> Iterable[Scalar]: +def _to_ticks(s, parse_single_int=False) -> Iterable[Scalar]: r""" Make a (list of) value(s) usable as the collection of labels for an Axis (ie hashable). @@ -429,7 +395,7 @@ def _to_labels(value, parse_single_int=False) -> Iterable[Scalar]: Parameters ---------- - value : str, list, tuple, range, pd.Index, Axis, Group, + s : iterable List of values usable as the collection of labels for an Axis. Returns @@ -438,23 +404,48 @@ def _to_labels(value, parse_single_int=False) -> Iterable[Scalar]: Examples -------- - >>> list(_to_labels('M , F')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_ticks('M , F')) # doctest: +NORMALIZE_WHITESPACE ['M', 'F'] - >>> list(_to_labels('A,C..E,F..G,Z')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_ticks('A,C..E,F..G,Z')) # doctest: +NORMALIZE_WHITESPACE ['A', 'C', 'D', 'E', 'F', 'G', 'Z'] - >>> list(_to_labels('U')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_ticks('U')) # doctest: +NORMALIZE_WHITESPACE ['U'] - >>> list(_to_labels('..3')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_ticks('..3')) # doctest: +NORMALIZE_WHITESPACE [0, 1, 2, 3] - >>> list(_to_labels('01..12')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_ticks('01..12')) # doctest: +NORMALIZE_WHITESPACE ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'] - >>> list(_to_labels('01,02,03,10,11,12')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_ticks('01,02,03,10,11,12')) # doctest: +NORMALIZE_WHITESPACE ['01', '02', '03', '10', '11', '12'] """ - labels = _to_label_or_labels(value, parse_single_int=parse_single_int) - if np.isscalar(labels): - labels = [labels] - return np.asarray(labels) + if isinstance(s, ABCAxis): + return s.labels + if isinstance(s, Group): + # a single LGroup used for all ticks of an Axis + return _to_ticks(s.eval()) + elif isinstance(s, np.ndarray): + # we assume it has already been translated + # XXX: Is it a safe assumption? + return s + + if isinstance(s, pd.Index): + ticks = s.values + elif isinstance(s, (list, tuple)): + ticks = [_to_tick(e) for e in s] + elif isinstance(s, range): + ticks = s + elif isinstance(s, str): + seq = _seq_str_to_seq(s, parse_single_int=parse_single_int) + if isinstance(seq, slice): + raise ValueError("using : to define axes is deprecated, please use .. instead") + ticks = [seq] if isinstance(seq, (str, int)) else seq + elif hasattr(s, '__array__'): + ticks = s.__array__() + else: + try: + ticks = list(s) + except TypeError: + raise TypeError(f"ticks must be iterable ({type(s)} is not)") + return np.asarray(ticks) _axis_name_pattern = re.compile(r'\s*(([A-Za-z0-9]\w*)(\.i)?\s*\[)?(.*)') @@ -672,7 +663,7 @@ def _to_keys(value, stack_depth=1) -> Union[Key, Tuple[Key]]: def _translate_sheet_name(sheet_name) -> str: if isinstance(sheet_name, Group): - sheet_name = str(_to_label(sheet_name)) + sheet_name = str(_to_tick(sheet_name)) if isinstance(sheet_name, str): sheet_name = _sheet_name_pattern.sub('_', sheet_name) if len(sheet_name) > 31: @@ -686,7 +677,7 @@ def _translate_sheet_name(sheet_name) -> str: def _translate_group_key_hdf(key) -> str: if isinstance(key, Group): - key = _key_hdf_pattern.sub('_', str(_to_label(key))) + key = _key_hdf_pattern.sub('_', str(_to_tick(key))) return key @@ -711,7 +702,7 @@ def union(*args) -> List[Any]: ['a', 'b', 'c', 'd', 'e', 'f', 0, 1, 2] """ if args: - return unique_list(chain(*(_to_labels(arg) for arg in args))) + return unique_list(chain(*(_to_ticks(arg) for arg in args))) else: return [] @@ -771,7 +762,7 @@ def __init__(self, key, name=None, axis=None): # we do NOT assign a name automatically when missing because that makes it impossible to know whether a name # was explicitly given or not - self.name = _to_label(name) if name is not None else name + self.name = _to_tick(name) if name is not None else name assert axis is None or isinstance(axis, (str, int, ABCAxis)), f"invalid axis '{axis}' ({type(axis).__name__})" # we could check the key is valid but this can be slow and could be useless @@ -1537,7 +1528,7 @@ def __hash__(self) -> int: # is a small price to pay if the performance impact is large. # the problem with using self.translate() is that we cannot compare groups without axis # return hash(_to_tick(self.translate())) - return hash(_to_label(self.key)) + return hash(_to_tick(self.key)) def remove_nested_groups(key) -> Any: @@ -1758,7 +1749,7 @@ def eval(self) -> Union[Scalar, Sequence[Scalar]]: raise ValueError("Cannot evaluate a positional group without axis") def __hash__(self): - return hash(('IGroup', _to_label(self.key))) + return hash(('IGroup', _to_tick(self.key))) PGroup = renamed_to(IGroup, 'PGroup', raise_error=True) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index a9f6e08b2..52ab48144 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -18,7 +18,7 @@ def decode(s, encoding='utf-8', errors='strict'): def parse(s): r""" - Used to parse the "folded" axis labels (usually periods). + Used to parse the "folded" axis ticks (usually periods). """ # parameters can be strings or numbers if isinstance(s, str): diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 2278201f3..32bf274a9 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -24,7 +24,7 @@ zip_array_values, zip_array_items, nan_to_num ) from larray.core.axis import ( - _to_labels, _to_key, _retarget_warn_msg, _group_as_aggregated_label_msg + _to_ticks, _to_key, _retarget_warn_msg, _group_as_aggregated_label_msg ) from larray.util.misc import LHDFStore @@ -43,8 +43,8 @@ def test_value_string_split(): - assert_nparray_equal(_to_labels('c0,c1'), np.asarray(['c0', 'c1'])) - assert_nparray_equal(_to_labels('c0, c1'), np.asarray(['c0', 'c1'])) + assert_nparray_equal(_to_ticks('c0,c1'), np.asarray(['c0', 'c1'])) + assert_nparray_equal(_to_ticks('c0, c1'), np.asarray(['c0', 'c1'])) def test_value_string_union(): @@ -52,12 +52,12 @@ def test_value_string_union(): def test_value_string_range(): - assert_nparray_equal(_to_labels('0..15'), np.arange(16)) - assert_nparray_equal(_to_labels('..15'), np.arange(16)) + assert_nparray_equal(_to_ticks('0..15'), np.arange(16)) + assert_nparray_equal(_to_ticks('..15'), np.arange(16)) with must_raise(ValueError, "no stop bound provided in range: '10..'"): - _to_labels('10..') + _to_ticks('10..') with must_raise(ValueError, "no stop bound provided in range: '..'"): - _to_labels('..') + _to_ticks('..') # ================ # diff --git a/larray/tests/test_axis.py b/larray/tests/test_axis.py index 5005cbe7f..f023486c4 100644 --- a/larray/tests/test_axis.py +++ b/larray/tests/test_axis.py @@ -811,19 +811,5 @@ def test_split(): assert b.equals(Axis(['b0', 'b1', 'b2'])) -def test_apply(): - sex = Axis('sex=MALE,FEMALE') - with must_warn(FutureWarning, msg="apply() is deprecated. Use set_labels() instead."): - res = sex.apply(str.capitalize) - assert res.equals(Axis(['Male', 'Female'], 'sex')) - - -def test_replace(): - sex = Axis('sex=M,F') - with must_warn(FutureWarning, msg="replace() is deprecated. Use set_labels() instead."): - res = sex.replace('M', 'Male') - assert res.equals(Axis(['Male', 'F'], 'sex')) - - if __name__ == "__main__": pytest.main() From 0369ffed2b18a082337c8cdb8ea03210724b8726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Fri, 27 Mar 2026 16:27:21 +0100 Subject: [PATCH 02/28] Revert "bump version to 0.36-dev" so that I can rebase 0.35.1 on master and avoid making the release from a branch, which I am unsure the release script still supports This reverts commit 931442765d6e118995de9f53b94b97d7e5ee135d. --- doc/source/changes.rst | 14 ------ doc/source/changes/version_0_36.rst.inc | 58 ------------------------- larray/__init__.py | 2 +- pyproject.toml | 2 +- 4 files changed, 2 insertions(+), 74 deletions(-) delete mode 100644 doc/source/changes/version_0_36.rst.inc diff --git a/doc/source/changes.rst b/doc/source/changes.rst index f53ee9a5c..410d88c99 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -1,20 +1,6 @@ Change log ########## -Version 0.36 -============ - -In development. - -CORE ----- -.. include:: ./changes/version_0_36.rst.inc - -EDITOR ------- -.. include:: ./changes/editor/version_0_36.rst.inc - - Version 0.35 ============ diff --git a/doc/source/changes/version_0_36.rst.inc b/doc/source/changes/version_0_36.rst.inc deleted file mode 100644 index a23f5c467..000000000 --- a/doc/source/changes/version_0_36.rst.inc +++ /dev/null @@ -1,58 +0,0 @@ -.. py:currentmodule:: larray - - -Syntax changes -^^^^^^^^^^^^^^ - -* renamed ``Array.old_method_name()`` to :py:obj:`Array.new_method_name()` (closes :issue:`1`). - -* renamed ``old_argument_name`` argument of :py:obj:`Array.method_name()` to ``new_argument_name``. - - -Backward incompatible changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -* other backward incompatible changes - - -New features -^^^^^^^^^^^^ - -* added a feature (see the :ref:`miscellaneous section ` for details). It works on :ref:`api-axis` and - :ref:`api-group` objects. - - Here is an example of the new feature: - - >>> arr = ndtest((2, 3)) - >>> arr - a\b b0 b1 b2 - a0 0 1 2 - a1 3 4 5 - - And it can also be used like this: - - >>> arr = ndtest("a=a0..a2") - >>> arr - a a0 a1 a2 - 0 1 2 - -* added another feature in the editor (closes :editor_issue:`1`). - - .. note:: - - - It works for foo bar ! - - It does not work for foo baz ! - - -.. _misc: - -Miscellaneous improvements -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -* improved something. - - -Fixes -^^^^^ - -* fixed something (closes :issue:`1`). diff --git a/larray/__init__.py b/larray/__init__.py index 8efb9536d..1377bf777 100644 --- a/larray/__init__.py +++ b/larray/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.36-dev' +__version__ = '0.35' from larray.core.axis import Axis, AxisCollection, X diff --git a/pyproject.toml b/pyproject.toml index 9a6186918..2603fb542 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ [project] name = "larray" -version = "0.36-dev" +version = "0.35" description = "N-D labeled arrays in Python" readme = { file = "README.rst", content-type = "text/x-rst" } From 755397f037ece14ac7c36447c2887c1e96542722 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Mon, 9 Mar 2026 17:36:50 +0100 Subject: [PATCH 03/28] bump version to 0.35.1-dev --- doc/source/changes.rst | 14 ++++++ doc/source/changes/version_0_35_1.rst.inc | 58 +++++++++++++++++++++++ larray/__init__.py | 2 +- pyproject.toml | 2 +- 4 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 doc/source/changes/version_0_35_1.rst.inc diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 410d88c99..b9e1ab3ce 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -1,6 +1,20 @@ Change log ########## +Version 0.35.1 +============== + +In development. + +CORE +---- +.. include:: ./changes/version_0_35_1.rst.inc + +EDITOR +------ +.. include:: ./changes/editor/version_0_35_1.rst.inc + + Version 0.35 ============ diff --git a/doc/source/changes/version_0_35_1.rst.inc b/doc/source/changes/version_0_35_1.rst.inc new file mode 100644 index 000000000..a23f5c467 --- /dev/null +++ b/doc/source/changes/version_0_35_1.rst.inc @@ -0,0 +1,58 @@ +.. py:currentmodule:: larray + + +Syntax changes +^^^^^^^^^^^^^^ + +* renamed ``Array.old_method_name()`` to :py:obj:`Array.new_method_name()` (closes :issue:`1`). + +* renamed ``old_argument_name`` argument of :py:obj:`Array.method_name()` to ``new_argument_name``. + + +Backward incompatible changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* other backward incompatible changes + + +New features +^^^^^^^^^^^^ + +* added a feature (see the :ref:`miscellaneous section ` for details). It works on :ref:`api-axis` and + :ref:`api-group` objects. + + Here is an example of the new feature: + + >>> arr = ndtest((2, 3)) + >>> arr + a\b b0 b1 b2 + a0 0 1 2 + a1 3 4 5 + + And it can also be used like this: + + >>> arr = ndtest("a=a0..a2") + >>> arr + a a0 a1 a2 + 0 1 2 + +* added another feature in the editor (closes :editor_issue:`1`). + + .. note:: + + - It works for foo bar ! + - It does not work for foo baz ! + + +.. _misc: + +Miscellaneous improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* improved something. + + +Fixes +^^^^^ + +* fixed something (closes :issue:`1`). diff --git a/larray/__init__.py b/larray/__init__.py index 1377bf777..b3b30ecf8 100644 --- a/larray/__init__.py +++ b/larray/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.35' +__version__ = '0.35.1-dev' from larray.core.axis import Axis, AxisCollection, X diff --git a/pyproject.toml b/pyproject.toml index 2603fb542..2736f7b5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ [project] name = "larray" -version = "0.35" +version = "0.35.1-dev" description = "N-D labeled arrays in Python" readme = { file = "README.rst", content-type = "text/x-rst" } From 7a61e1f68a9b7fba3748790c3f634910172d0a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 3 Mar 2026 16:20:22 +0100 Subject: [PATCH 04/28] DOC: added note about returning NotImplemented for eq and ne binary ops --- larray/core/array.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/larray/core/array.py b/larray/core/array.py index a88920346..af26f1ba8 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -5675,10 +5675,15 @@ def opmethod(self, other) -> 'Array': opmethod.__name__ = fullname return opmethod - __lt__ = _binop('lt') - __le__ = _binop('le') + # We use the normal _binop even for eq and ne, which will return + # NotImplemented for unknown types instead of returning False or True + # directly (which would be more efficient) because that is the right thing + # to do: it leaves the possibility open for the other type to implement + # equality with Arrays __eq__ = _binop('eq') __ne__ = _binop('ne') + __lt__ = _binop('lt') + __le__ = _binop('le') __gt__ = _binop('gt') __ge__ = _binop('ge') __add__ = _binop('add') From 52a448a63025f802534a68fbcce545fc3c3ddd5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Mon, 9 Mar 2026 10:38:46 +0100 Subject: [PATCH 05/28] FIX: added warning for unsafe casting in CheckedSession (fixes #1168) had to rework the interface between Session and CheckedSession --- doc/source/changes/version_0_35_1.rst.inc | 26 ++++- larray/core/checked.py | 126 +++++++++++++++------- larray/core/session.py | 113 +++++++++---------- larray/tests/test_checked_session.py | 69 +++++++++--- 4 files changed, 221 insertions(+), 113 deletions(-) diff --git a/doc/source/changes/version_0_35_1.rst.inc b/doc/source/changes/version_0_35_1.rst.inc index a23f5c467..ff30a757c 100644 --- a/doc/source/changes/version_0_35_1.rst.inc +++ b/doc/source/changes/version_0_35_1.rst.inc @@ -49,7 +49,31 @@ New features Miscellaneous improvements ^^^^^^^^^^^^^^^^^^^^^^^^^^ -* improved something. +* added warning when doing unsafe implicit type conversions (for example from + float to integers) when setting CheckedSession fields (closes :issue:`1168`). + + >>> a = Axis('a=a0,a1') + >>> class MySession(CheckedSession): + ... my_field: CheckedArray(a, dtype=int) + >>> good_value = ndtest(a) + >>> good_value + a a0 a1 + 0 1 + >>> s = MySession(my_field=good_value) + >>> bad_value = good_value * 1.6 + >>> bad_value + a a0 a1 + 0.0 1.6 + >>> s.my_field = bad_value + Array 'my_field' was declared with dtype int64 but got value with float64 dtype. + + It will be converted to int64 like in previous larray versions but this is + not a safe operation (some information could be lost in the conversion). + + If you want to keep doing this conversion and silence this warning, please + convert the value explicitly using value.astype(). + + This warning will become an error in a future version of larray. Fixes diff --git a/larray/core/checked.py b/larray/core/checked.py index 89f2ddf3d..6b0f2e9b0 100644 --- a/larray/core/checked.py +++ b/larray/core/checked.py @@ -61,6 +61,19 @@ def __init__(self, *args, **kwargs): # TypeAliasType, # from `typing_extensions` ) + UNSAFE_CAST_WARNING_TEMPLATE = """ +Array '{name}' was declared with dtype {expected_dtype} but got value with \ +{value_dtype} dtype. + +It will be converted to {expected_dtype} like in previous larray versions but \ +this is +not a safe operation (some information could be lost in the conversion). + +If you want to keep doing this conversion and silence this warning, please +convert the value explicitly using value.astype(). + +This warning will become an error in a future version of larray.""" + def CheckedArray(axes: AxisCollection, dtype: np.dtype = float) -> Type[Array]: """ @@ -113,6 +126,17 @@ def validate_array(value: Any, info: ValidationInfo) -> Array: value = value.expand(expected_axes) # check dtype if value.dtype != dtype: + if not np.can_cast(value.dtype, dtype, 'safe'): + # TODO: in a later version, turn to an exception + stacklevel = info.context.get("stacklevel", 2) + warn_msg = UNSAFE_CAST_WARNING_TEMPLATE.format( + name=name, + expected_dtype=dtype, + value_dtype=value.dtype + ) + warnings.warn(warn_msg, + category=FutureWarning, + stacklevel=stacklevel) value = value.astype(dtype) return value else: @@ -204,17 +228,20 @@ class CheckedSession(Session, BaseModel, metaclass=LArrayModelMetaclass): >>> class ModelVariables(CheckedSession): ... # --- declare variables with defined types --- - ... # Their values will be defined at runtime but must match the specified type. + ... # Their values will be defined at runtime but must match the + ... # specified type. ... birth_rate: Array ... births: Array ... # --- declare variables with a default value --- - ... # The default value will be used to set the variable if no value is passed at instantiation (see below). - ... # Their type is deduced from their default value and cannot be changed at runtime. + ... # The default value will be used to set the variable if no value + ... # is passed at instantiation (see below). + ... # Their type is deduced from their default value and cannot be + ... # changed at runtime. ... target_age = AGE[:2] >> '0-2' ... population = zeros((AGE, GENDER, TIME), dtype=int) ... # --- declare checked arrays --- - ... # The checked arrays have axes assumed to be "frozen", meaning they are - ... # constant all along the execution of the program. + ... # The checked arrays have axes assumed to be "frozen", meaning + ... # they are constant over the whole execution of the program. ... mortality_rate: CheckedArray((AGE, GENDER)) ... # For checked arrays, the default value can be given as a scalar. ... # Optionally, a dtype can be also specified (defaults to float). @@ -287,9 +314,11 @@ class CheckedSession(Session, BaseModel, metaclass=LArrayModelMetaclass): memory used: 1.89 Kb >>> # now let's try to do the same for deaths. >>> m.mortality_rate = full((AGE, GENDER), fill_value=sequence(AGE, inc=0.02)) - >>> # here the result of the multiplication of the 'population' array by the 'mortality_rate' array - >>> # is automatically converted to an integer array - >>> m.deaths = m.population * m.mortality_rate + >>> # the result of this multiplication is a float array + >>> deaths = m.population * m.mortality_rate + >>> # because m.deaths was declared as a CheckedArray with dtype=int, + >>> # we need to convert it to an integer array to avoid a warning + >>> m.deaths = (m.population * m.mortality_rate).astype(int) >>> print(m.deaths.info) # doctest: +SKIP 11 x 2 x 11 age [11]: 0 1 2 ... 8 9 10 @@ -332,35 +361,50 @@ def __init__(self, *args, meta=None, **kwargs): # is given as only argument # TODO: refactor Session.load() to use a private function which returns the handler directly # so that we can get the items out of it and avoid this - input_data = dict(Session(*args, **kwargs)) + input_session = Session(*args, **kwargs) + input_data = input_session._objects + self._initialize_attributes(input_data, stacklevel=2) + + @classmethod + def _create_instance_from_dict(cls, data: dict, stacklevel=3) -> Session: + try: + inst = object.__new__(cls) + Session.__init__(inst) + inst._initialize_attributes(data, stacklevel=stacklevel) + return inst + except Exception as e: + print(f"failed to create CheckedSession ({e}) => return normal Session") + return Session(data) + + def _initialize_attributes(self, data: dict, stacklevel: int): + data = data.copy() # --- declared variables for name, field in self.__class__.model_fields.items(): - value = input_data.pop(name, NOT_LOADED) - + value = data.pop(name, NOT_LOADED) + skip_validation = False if value is NOT_LOADED: if field.default is PydanticUndefined: - warnings.warn(f"No value passed for the declared variable '{name}'", - stacklevel=2) # we actually use NOT_LOADED as the value - self.__setattr__(name, value, skip_frozen=True, skip_validation=True) + warnings.warn(f"No value passed for the declared variable '{name}'", + stacklevel=stacklevel + 1) + skip_validation = True else: - self.__setattr__(name, field.default, skip_frozen=True) + value = field.default + + if skip_validation: + super().__setattr__(name, value) else: - self.__setattr__(name, value, skip_frozen=True) + self.__setattr__(name, value, skip_frozen=True, stacklevel=stacklevel + 1) # --- undeclared variables - for name, value in input_data.items(): - self.__setattr__(name, value, skip_frozen=True, stacklevel=2) + for name, value in data.items(): + self.__setattr__(name, value, skip_frozen=True, stacklevel=stacklevel + 1) # code of the method below has been partly borrowed from pydantic.BaseModel.__setattr__() def _check_key_value(self, name: str, value: Any, skip_frozen: bool, - skip_validation: bool, stacklevel: int) -> Any: - if skip_validation: - return value - cls = self.__class__ cls_name = cls.__name__ model_config = cls.model_config @@ -382,6 +426,7 @@ def _check_key_value(self, name: str, field_info = model_fields[name] field_type = field_info.annotation if field_type is None: + assert False, f"None field_type for {name} ({value})" return value # Annotated[T, x] => field_info.metadata == (x,) @@ -399,9 +444,11 @@ def _check_key_value(self, name: str, # see https://docs.pydantic.dev/2.12/concepts/types/#custom-types # for more details about TypeAdapter - adapter = TypeAdapter(field_type, config=self.model_config) + adapter = TypeAdapter(field_type, config=model_config) try: - value = adapter.validate_python(value, context={'name': name}) + # pydantic machinery adds a few stack frames + validate_context = {'name': name, 'stacklevel': stacklevel + 3} + value = adapter.validate_python(value, context=validate_context) except ValidationError as e: error = e.errors()[0] msg = f"Error while assigning value to variable '{name}':\n" @@ -418,32 +465,29 @@ def _check_key_value(self, name: str, def _update_from_iterable(self, it): for k, v in it: - self.__setitem__(k, v, stacklevel=3) + self.__setattr__(k, v, stacklevel=3) - def __setitem__(self, key, value, skip_frozen=False, skip_validation=False, stacklevel=1): + def __setitem__(self, key, value): if key == 'meta': raise ValueError( "Sessions cannot contain any object named 'meta'. " "To modify the session metadata, use " "'session.meta = value' instead.") - value = self._check_key_value(key, value, skip_frozen, skip_validation, stacklevel=stacklevel + 1) - # we need to keep the attribute in sync - # TODO: I don't think this is specific to CheckedSession, so either - # we should do it in Session too or not do it here. - object.__setattr__(self, key, value) - self._objects[key] = value - - def __setattr__(self, key, value, skip_frozen=False, skip_validation=False, stacklevel=1): + value = self._check_key_value(key, value, + skip_frozen=False, + stacklevel=2) + super().__setitem__(key, value) + # object.__setattr__(self, key, value) + + def __setattr__(self, key, value, skip_frozen=False, stacklevel=1): if key == 'meta': object.__setattr__(self, key, value) return - - value = self._check_key_value(key, value, skip_frozen, skip_validation, stacklevel=stacklevel + 1) - # we need to keep the attribute in sync - # TODO: I don't think this is specific to CheckedSession, so either - # we should do it in Session too or not do it here. - object.__setattr__(self, key, value) - self._objects[key] = value + value = self._check_key_value(key, value, + skip_frozen, + stacklevel=stacklevel + 1) + super().__setattr__(key, value) + # object.__setattr__(self, key, value) def __getstate__(self) -> Dict[str, Any]: return {'__dict__': self.__dict__} diff --git a/larray/core/session.py b/larray/core/session.py index d81cb1e83..e383648da 100644 --- a/larray/core/session.py +++ b/larray/core/session.py @@ -310,8 +310,9 @@ def __delitem__(self, key) -> None: del self._objects[key] def __getattr__(self, key) -> Any: - if key in self._objects: - return self._objects[key] + data = object.__getattribute__(self, '_objects') + if key in data: + return data[key] else: raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{key}'") @@ -1013,44 +1014,48 @@ def __repr__(self) -> str: def __len__(self) -> int: return len(self._objects) + @classmethod + def _create_instance_from_dict(cls, data: dict, stacklevel=3) -> 'Session': + return cls(data) + # binary operations are dispatched element-wise to all arrays (we consider Session as an array-like) - def _binop(opname, arrays_only=True): + def _compute_binop(self, opname, other, cls_filter) -> dict: opfullname = f'__{opname}__' - - def opmethod(self, other) -> 'Session': - self_keys = set(self.keys()) - all_keys = list(self.keys()) - if not isinstance(other, Array) and hasattr(other, 'keys'): - all_keys += [n for n in other.keys() if n not in self_keys] - with np.errstate(call=_session_float_error_handler): - res = [] - for name in all_keys: - self_item = self.get(name, nan) - other_operand = other.get(name, nan) if hasattr(other, 'get') else other - if arrays_only and not isinstance(self_item, Array): - res_item = self_item - else: + inv_opname = f'__{inverseop(opname)}__' + all_keys = list(self.keys()) + if not isinstance(other, Array) and hasattr(other, 'keys'): + # we cannot simply use all_keys += self.keys() - other.keys() + # because we need to keep ordering + self_keys_set = set(all_keys) + all_keys.extend([k for k in other.keys() if k not in self_keys_set]) + + res = {} + with np.errstate(call=_session_float_error_handler): + for key in all_keys: + self_item = self.get(key, nan) + other_item = other.get(key, nan) if hasattr(other, 'get') else other + if cls_filter is not None and not isinstance(self_item, cls_filter): + res_item = self_item + else: + try: + res_item = getattr(self_item, opfullname)(other_item) + # TypeError for str arrays, ValueError for incompatible axes, ... + except Exception: + res_item = nan + if res_item is NotImplemented: try: - res_item = getattr(self_item, opfullname)(other_operand) + res_item = getattr(other_item, inv_opname)(self_item) # TypeError for str arrays, ValueError for incompatible axes, ... except Exception: res_item = nan - # this should only ever happen when self_array is a non Array (eg. nan) - if res_item is NotImplemented: - inv_opname = f'__{inverseop(opname)}__' - try: - res_item = getattr(other_operand, inv_opname)(self_item) - # TypeError for str arrays, ValueError for incompatible axes, ... - except Exception: - res_item = nan - res.append((name, res_item)) - try: - # XXX: print a warning? - ses = self.__class__(res) - except Exception: - ses = Session(res) - return ses - opmethod.__name__ = opfullname + res[key] = res_item + return res + + def _binop(opname, default_cls_filter=Array): + def opmethod(self, other, cls_filter=default_cls_filter) -> 'Session': + dict_res = self._compute_binop(opname, other, cls_filter) + return self._create_instance_from_dict(dict_res) + opmethod.__name__ = f'__{opname}__' return opmethod __add__ = _binop('add') @@ -1062,30 +1067,30 @@ def opmethod(self, other) -> 'Session': __truediv__ = _binop('truediv') __rtruediv__ = _binop('rtruediv') - __eq__ = _binop('eq', arrays_only=False) - __ne__ = _binop('ne', arrays_only=False) + __eq__ = _binop('eq', default_cls_filter=None) + __ne__ = _binop('ne', default_cls_filter=None) # element-wise method factory # unary operations are (also) dispatched element-wise to all arrays - def _unaryop(opname): + def _compute_unaryop(self, opname, cls_filter): opfullname = f'__{opname}__' + res = {} + with np.errstate(call=_session_float_error_handler): + for k, v in self.items(): + try: + res_item = getattr(v, opfullname)() + except Exception: + res_item = nan + res[k] = res_item + return res - def opmethod(self) -> 'Session': - with np.errstate(call=_session_float_error_handler): - res = [] - for k, v in self.items(): - try: - res_array = getattr(v, opfullname)() - except Exception: - res_array = nan - res.append((k, res_array)) - try: - # XXX: print a warning? - ses = self.__class__(res) - except Exception: - ses = Session(res) - return ses - opmethod.__name__ = opfullname + def _unaryop(opname): + # TODO: change default cls_filter to Array (using None for now + # to avoid changing behavior in a bugfix release) + def opmethod(self, cls_filter=None) -> 'Session': + dict_res = self._compute_unaryop(opname, cls_filter) + return self._create_instance_from_dict(dict_res) + opmethod.__name__ = f'__{opname}__' return opmethod __neg__ = _unaryop('neg') @@ -1763,4 +1768,4 @@ def arrays(depth=0, include_private=False, meta=None) -> Session: return Session({k: v for k, v in combined_vars if isinstance(v, Array)}, meta=meta) -_session_float_error_handler = float_error_handler_factory(4) +_session_float_error_handler = float_error_handler_factory(5) diff --git a/larray/tests/test_checked_session.py b/larray/tests/test_checked_session.py index 7251bdddc..8568e78d2 100644 --- a/larray/tests/test_checked_session.py +++ b/larray/tests/test_checked_session.py @@ -17,8 +17,7 @@ c, d, e, f, g, h, assert_seq_equal, session, test_getitem, test_getattr, test_add, test_element_equals, test_eq, test_ne) -from larray.core.checked import NotLoaded - +from larray.core.checked import NotLoaded, UNSAFE_CAST_WARNING_TEMPLATE # avoid flake8 errors meta = meta @@ -145,6 +144,14 @@ class TestInheritance(CheckedSessionExample): check_file=False): _ = CheckedSessionExample(a, a01, a2=a2, e=e, f=f, g=g, h=h) + # test using unsafe casting + warn_msg = UNSAFE_CAST_WARNING_TEMPLATE.format( + name='h', expected_dtype='int64', value_dtype='float64' + ) + float_h = h * 1.5 + with must_warn(FutureWarning, warn_msg): + _ = CheckedSessionExample(a=a, a01=a01, a2=a2, e=e, f=f, g=g, h=float_h) + @needs_pytables def test_init_checkedsession_hdf(): @@ -222,30 +229,34 @@ def test_setattr_cs(checkedsession): cs.h = zeros_like(h) # trying to add an undeclared variable -> prints a warning message - with must_warn(UserWarning, msg=f"'i' is not declared in '{cs.__class__.__name__}'"): + msg = f"'i' is not declared in '{cs.__class__.__name__}'" + with must_warn(UserWarning, msg=msg): cs.i = ndtest((3, 3)) # trying to set a variable with an object of different type -> should fail # a) type given explicitly # -> Axis - with must_raise(TypeError, msg="Error while assigning value to variable 'a':\n" - "Input should be an instance of Axis. Got input value of type 'int'."): + msg = "Error while assigning value to variable 'a':\n" \ + "Input should be an instance of Axis. Got input value of type 'int'." + with must_raise(TypeError, msg=msg): cs.a = 0 # -> CheckedArray - # with must_raise(TypeError, msg="Error while assigning value to variable 'h':\n" - # "Input should be an instance of Array. Got input value of type 'ndarray'."): - with must_raise(TypeError, msg="Expected object of type 'Array' or a scalar for the variable 'h' but got " - "object of type 'ndarray'"): + msg = ("Expected object of type 'Array' or a scalar for the variable 'h' " + "but got object of type 'ndarray'") + with must_raise(TypeError, msg=msg): cs.h = h.data # b) type deduced from the given default value - with must_raise(TypeError, msg="Error while assigning value to variable 'b':\n" - "Input should be an instance of Axis. Got input value of type 'Array'."): + msg = ("Error while assigning value to variable 'b':\n" + "Input should be an instance of Axis. Got input value of type " + "'Array'.") + with must_raise(TypeError, msg=msg): cs.b = ndtest((3, 3)) # trying to set a CheckedArray variable using a scalar -> OK cs.h = 5 - # trying to set a CheckedArray variable using an array with axes in different order -> OK + # trying to set a CheckedArray variable using an array with axes in + # different order -> OK cs.h = h.transpose() assert cs.h.axes.names == h.axes.names @@ -253,12 +264,16 @@ def test_setattr_cs(checkedsession): cs.h = ndtest(a3) assert_array_nan_equal(cs.h['b0'], cs.h['b1']) - # trying to set a CheckedArray variable using an array with wrong axes -> should fail + # trying to set a CheckedArray variable using an array with wrong axes + # => should fail + # a) extra axis - with must_raise(ValueError, msg="Error while assigning value to variable 'h':\n" - "Array 'h' was declared with axes {a, b} but got array with axes {a, b, c} " - "(unexpected {c} axis)"): + msg = ("Error while assigning value to variable 'h':\n" + "Array 'h' was declared with axes {a, b} " + "but got array with axes {a, b, c} (unexpected {c} axis)") + with must_raise(ValueError, msg=msg): cs.h = ndtest((a3, b2, 'c=c0..c2')) + # b) incompatible axis msg = """\ Error while assigning value to variable 'h': @@ -269,6 +284,19 @@ def test_setattr_cs(checkedsession): with must_raise(ValueError, msg=msg): cs.h = h.append('a', 0, 'a4') + # set a CheckedArray variable with safe casting (from int to float) + int_f = f.astype(int) + cs.f = int_f + + # set a CheckedArray variable with unsafe casting (from float to int) + # => warning + warn_msg = UNSAFE_CAST_WARNING_TEMPLATE.format( + name='h', expected_dtype='int64', value_dtype='float64' + ) + float_h = h * 1.5 + with must_warn(FutureWarning, msg=warn_msg): + cs.h = float_h + def test_add_cs(checkedsession): cs = checkedsession @@ -636,7 +664,14 @@ def test_sub_cs(checkedsession): axes = cs.h.axes cs.e = ndtest(axes) cs.g = ones_like(cs.h) - diff = cs - ones(axes) + + # an operation with a float array will create float values, hence the + # checked session will complain of unsafe casting (from float to int) + warn_msg = UNSAFE_CAST_WARNING_TEMPLATE.format( + name='h', expected_dtype='int64', value_dtype='float64' + ) + with must_warn(FutureWarning, warn_msg): + diff = cs - ones(axes) assert isinstance(diff, session_cls) # --- non-array variables --- assert diff.b is b From c407cdf79bd2c6a2ef877e66e5bcd07a0359407c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Mon, 9 Mar 2026 10:43:22 +0100 Subject: [PATCH 06/28] CLN: slightly nicer code flow --- larray/core/session.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/larray/core/session.py b/larray/core/session.py index e383648da..bd66db353 100644 --- a/larray/core/session.py +++ b/larray/core/session.py @@ -116,10 +116,12 @@ def meta(self) -> Metadata: @meta.setter def meta(self, meta) -> None: - if not isinstance(meta, (list, dict, Metadata)): - raise TypeError(f"Expected list of pairs or dict or Metadata object " + if isinstance(meta, (list, dict)): + meta = Metadata(meta) + elif not isinstance(meta, Metadata): + raise TypeError("Expected list of pairs or dict or Metadata object " f"instead of {type(meta).__name__}") - object.__setattr__(self, '_meta', meta if isinstance(meta, Metadata) else Metadata(meta)) + object.__setattr__(self, '_meta', meta) # XXX: behave like a dict and return keys instead? def __iter__(self) -> Iterable[Any]: From 4b43fde5986bd4641b888e64c92ae0573e85ae4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Mon, 9 Mar 2026 10:48:10 +0100 Subject: [PATCH 07/28] CLN: wrap code I know... I flip-flopped again :) --- larray/core/array.py | 4 +++- larray/core/checked.py | 17 +++++++++++------ larray/core/session.py | 4 ++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/larray/core/array.py b/larray/core/array.py index af26f1ba8..c49a2972c 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -2526,8 +2526,10 @@ def __str__(self) -> str: elif len(self) == 0: return 'Array([])' else: - table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS], + table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], + edgeitems=_OPTIONS[DISPLAY_EDGEITEMS], _axes_display_names=True) + return table2str(table, 'nan', maxwidth=_OPTIONS[DISPLAY_WIDTH], keepcols=self.ndim - 1, precision=_OPTIONS[DISPLAY_PRECISION]) __repr__ = __str__ diff --git a/larray/core/checked.py b/larray/core/checked.py index 6b0f2e9b0..12f154358 100644 --- a/larray/core/checked.py +++ b/larray/core/checked.py @@ -359,8 +359,9 @@ def __init__(self, *args, meta=None, **kwargs): # create an intermediate Session object to not call the __setattr__ # and __setitem__ overridden in the present class and in case a filepath # is given as only argument - # TODO: refactor Session.load() to use a private function which returns the handler directly - # so that we can get the items out of it and avoid this + # TODO: refactor Session.load() to use a private function which + # returns the handler directly so that we can get the items + # out of it and avoid this input_session = Session(*args, **kwargs) input_data = input_session._objects self._initialize_attributes(input_data, stacklevel=2) @@ -385,9 +386,10 @@ def _initialize_attributes(self, data: dict, stacklevel: int): if value is NOT_LOADED: if field.default is PydanticUndefined: # we actually use NOT_LOADED as the value - warnings.warn(f"No value passed for the declared variable '{name}'", + warnings.warn(f"No value passed for the declared " + f"variable '{name}'", stacklevel=stacklevel + 1) - skip_validation = True + skip_validation=True else: value = field.default @@ -409,8 +411,11 @@ def _check_key_value(self, name: str, cls_name = cls.__name__ model_config = cls.model_config if model_config['frozen'] and not skip_frozen: - raise TypeError(f"Cannot change the value of the variable '{name}' since '{cls_name}' " - f"is immutable and does not support item assignment") + raise TypeError( + f"Cannot change the value of the variable '{name}' since " + f"'{cls_name}' is immutable and does not support item " + f"assignment" + ) model_fields = cls.model_fields if name not in model_fields: diff --git a/larray/core/session.py b/larray/core/session.py index bd66db353..e68c88248 100644 --- a/larray/core/session.py +++ b/larray/core/session.py @@ -131,8 +131,8 @@ def add(self, *args, **kwargs) -> None: r""" Deprecated. Please use Session.update instead. """ - warnings.warn("Session.add() is deprecated. Please use Session.update() instead.", - FutureWarning, stacklevel=2) + warnings.warn("Session.add() is deprecated. Please use " + "Session.update() instead.", FutureWarning, stacklevel=2) self.update({arg.name: arg for arg in args}, **kwargs) def update(self, other=None, **kwargs) -> None: From 0189dabe5b92522d4e374f980f7c22ce5356ac51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Mon, 9 Mar 2026 11:00:22 +0100 Subject: [PATCH 08/28] FIX: MultiIndex+str column with Pandas 3.0 (fixes #1166) --- doc/source/changes/version_0_35_1.rst.inc | 5 +++- larray/inout/pandas.py | 23 ++++++++++++++++ larray/tests/test_array.py | 32 +++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/doc/source/changes/version_0_35_1.rst.inc b/doc/source/changes/version_0_35_1.rst.inc index ff30a757c..2a7593770 100644 --- a/doc/source/changes/version_0_35_1.rst.inc +++ b/doc/source/changes/version_0_35_1.rst.inc @@ -79,4 +79,7 @@ Miscellaneous improvements Fixes ^^^^^ -* fixed something (closes :issue:`1`). +* fixed loading data files with all the following conditions when using + Pandas >= 3.0: mixed type columns, some combination of labels in index + columns not being present in the data and a non-NaN fill_value + (closes :issue:`1166`). diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index 52ab48144..726f735ae 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -88,6 +88,29 @@ def cartesian_product_df(df, sort_rows=False, sort_columns=False, np.array_equal(idx.values, prod_index.values) and np.array_equal(columns.values, prod_columns.values)): return df, combined_labels + import numbers + if (isinstance(fill_value, numbers.Number) and not np.isnan(fill_value) and + any(dt.kind == 'O' and dt.type is str for dt in df.dtypes)): + df = df.copy() + for col in df.columns: + dt = df[col].dtype + if dt.kind == 'O' and dt.type is str: + # TODO: we should really output this warning, but the user + # needs a way to silence it, which requires we implement + # in all user-facing functions calling this function + # directly or indirectly (from_series, from_frame, + # read_excel, read_csv, ...) a way to specify + # both a usecols argument (when the data has mixed type + # but the user only needs an homogeneously typed subset) + # and a dtype=object argument when the user does need + # mixed types (and want to silence the warning). + # warnings.warn("fill_value is not valid for all " + # "columns because it is a (non-NaN) number but " + # f"the '{col}' column has string dtype. That " + # "column will converted to object dtype to avoid " + # "errors but this may cause performance issues.", + # FutureWarning, stacklevel=3) + df[col] = df[col].astype(object) return df.reindex(index=prod_index, columns=prod_columns, fill_value=fill_value, **kwargs), combined_labels diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 32bf274a9..fa795f834 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -4656,6 +4656,38 @@ def test_from_frame(): res = from_frame(df, sort_rows=True, sort_columns=True) assert_larray_equal(res, expected) + # h) with a MultiIndex with some missing combination and a fill_value which + # is not valid for all columns. This pattern broke for Pandas >= 3 + # See issue #1166. In the user code, this issue came from, from_frame + # is used indirectly by read_excel and users are keeping only the + # numeric column (col2 in the test below) just after reading the array. + df = pd.DataFrame({ + 'idx1': [0, 0, 1], + 'idx2': ['a', 'b', 'a'], + 'col1': ['a', 'a', 'a'], + 'col2': [1, 1, 1] + }).set_index(['idx1', 'idx2']) + # TODO: we should have this warning, but before that we need more + # functionality to let users do something about it. + # See comment in inout/pandas.py:cartesian_product_df + # msg = ("fill_value is not valid for all columns because it is a " + # "(non-NaN) number but the 'col1' column has string dtype. That " + # "column will converted to object dtype to avoid errors but this " + # "may cause performance issues.") + # with must_warn(FutureWarning, msg=msg): + # from_frame(df, fill_value=-1) + res = from_frame(df, fill_value=-1) + expected_data = [[['a', 1], + ['a', 1]], + [['a', 1], + [-1, -1]]] + expected_axes = [ + # third axis is anonymous + Axis('idx1=0,1'), Axis('idx2=a,b'), Axis('col1,col2') + ] + expected = Array(expected_data, expected_axes, dtype=object) + assert_larray_equal(res, expected) + def test_asarray(): series = pd.Series([0, 1, 2], ['a0', 'a1', 'a2'], name='a') From 1b1e325de3433220e679905fb60af1a528f4d4c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Mon, 9 Mar 2026 17:50:23 +0100 Subject: [PATCH 09/28] TEST: better assert --- larray/tests/test_checked_session.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/larray/tests/test_checked_session.py b/larray/tests/test_checked_session.py index 8568e78d2..abe5b97d3 100644 --- a/larray/tests/test_checked_session.py +++ b/larray/tests/test_checked_session.py @@ -672,7 +672,10 @@ def test_sub_cs(checkedsession): ) with must_warn(FutureWarning, warn_msg): diff = cs - ones(axes) - assert isinstance(diff, session_cls) + assert isinstance(diff, session_cls), ( + f"diff session has type {type(diff).__name__} instead of " + f"{session_cls.__name__}" + ) # --- non-array variables --- assert diff.b is b assert diff.b024 is b024 From 745e7107b1e10e99678219c5f84cda0b53245134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Thu, 12 Mar 2026 12:07:48 +0100 Subject: [PATCH 10/28] DOC: use simpler/clearer way to initialize array in doctest --- larray/core/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/larray/core/session.py b/larray/core/session.py index e68c88248..247366c20 100644 --- a/larray/core/session.py +++ b/larray/core/session.py @@ -1359,7 +1359,7 @@ def compact(self, display=False) -> 'Session': Examples -------- - >>> arr1 = sequence('b=b0..b2', ndtest(3), zeros_like(ndtest(3))) + >>> arr1 = sequence('b=b0..b2', initial=ndtest(3), inc=0) >>> arr1 a\b b0 b1 b2 a0 0 0 0 From cbc4cb011ea4c1cfe0b39b429487168d7c25536d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Thu, 12 Mar 2026 14:52:03 +0100 Subject: [PATCH 11/28] TEST: nicer assert message --- larray/tests/test_session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py index 183562b3b..19fa74b2f 100644 --- a/larray/tests/test_session.py +++ b/larray/tests/test_session.py @@ -422,7 +422,7 @@ def test_eq(session): # ====== same sessions ====== res = session == other_session res = to_boolean_array_eq(res) - assert res.axes == expected_res.axes + assert res.axes == expected_res.axes, f"{res.axes=} != {expected_res.axes=}" assert res.equals(expected_res) # ====== session with missing/extra items ====== From b84fb8313d141f9a9b4fba7beeaf58918263b76f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Thu, 12 Mar 2026 14:52:29 +0100 Subject: [PATCH 12/28] CLN: indent --- larray/util/misc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/larray/util/misc.py b/larray/util/misc.py index 6b41fef99..87f9539e4 100644 --- a/larray/util/misc.py +++ b/larray/util/misc.py @@ -513,7 +513,9 @@ def error_handler(error, flag): else: # for division by 0, we use a specific error handler *just* to set the correct stacklevel extra = '' - warnings.warn(f"{error} encountered during operation{extra}", RuntimeWarning, stacklevel=stacklevel) + warnings.warn(f"{error} encountered during operation{extra}", + RuntimeWarning, + stacklevel=stacklevel) return error_handler From 643b078ecf4001dc7ba07a0b1a5864fd20c37224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Fri, 13 Mar 2026 17:20:13 +0100 Subject: [PATCH 13/28] DOC: fixed return type hint --- larray/core/array.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/larray/core/array.py b/larray/core/array.py index c49a2972c..75c82c984 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -2546,11 +2546,12 @@ def __contains__(self, key) -> bool: # XXX: dump as a 2D Array with row & col dims? def dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is', - maxlines=-1, edgeitems=5, _axes_display_names=False) -> List[List[str]]: + maxlines=-1, edgeitems=5, _axes_display_names=False) -> List[List[Any]]: r"""dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is', maxlines=-1, edgeitems=5) - Dump array as a 2D nested list. This is especially useful when writing to an Excel sheet via open_excel(). + Dump array as a 2D nested list. This is especially useful when writing + to an Excel sheet via open_excel(). Parameters ---------- From 20e3d2c36f31fd45e61bca9739a172a9123ebbaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Fri, 13 Mar 2026 17:23:34 +0100 Subject: [PATCH 14/28] FIX: remove Pandas 3.0rc workaround to save to H5 The issue was fixed before the final release (something like 3 hours after I reported it !) --- larray/core/array.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/larray/core/array.py b/larray/core/array.py index 75c82c984..e9c7f11fc 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -7002,21 +7002,8 @@ def to_hdf(self, filepath, key) -> None: >>> a.to_hdf('test.h5', 'arrays/a') # doctest: +SKIP """ key = _translate_group_key_hdf(key) - def ensure_non_string_index(index): - if (isinstance(index, pd.MultiIndex) and - any(isinstance(dt, pd.StringDtype) for dt in index.dtypes)): - obj_levels = [pd.Index(level_labels, dtype=object) - for level_labels in index.levels] - index = pd.MultiIndex(obj_levels, index.codes, names=index.names) - return index - with LHDFStore(filepath) as store: df = self.to_frame() - # workaround for https://github.com/pandas-dev/pandas/issues/63412 - # Since we created the dataframe ourselves, we can just - # replace the index inplace - df.index = ensure_non_string_index(df.index) - df.columns = ensure_non_string_index(df.columns) store.put(key, df) attrs = store.get_storer(key).attrs From a643380afa04c6a3b2ff9d49c4ab3f062147df74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Fri, 13 Mar 2026 17:44:38 +0100 Subject: [PATCH 15/28] CLN: better indent and add comment --- larray/inout/hdf.py | 9 +++++++-- larray/inout/pandas.py | 22 ++++++++++++++++------ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py index 5d6e94eb6..47560603c 100644 --- a/larray/inout/hdf.py +++ b/larray/inout/hdf.py @@ -90,8 +90,13 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s if _type == 'Array': # cartesian product is not necessary if the array was written by LArray cartesian_prod = writer != 'LArray' - res = df_asarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, - parse_header=False, cartesian_prod=cartesian_prod) + # we use df_asarray instead of from_frame to support 1D arrays + res = df_asarray(pd_obj, + sort_rows=sort_rows, + sort_columns=sort_columns, + fill_value=fill_value, + parse_header=False, + cartesian_prod=cartesian_prod) if _meta is not None: res.meta = _meta elif _type == 'Axis': diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index 726f735ae..a8a254792 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -75,8 +75,11 @@ def product_index(idx, sort=False): return idx, labels -def cartesian_product_df(df, sort_rows=False, sort_columns=False, - fill_value=nan, **kwargs): +def cartesian_product_df(df, + sort_rows=False, + sort_columns=False, + fill_value=nan, + **kwargs): idx = df.index columns = df.columns prod_index, index_labels = product_index(idx, sort=sort_rows) @@ -188,8 +191,15 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs) -> Arra return Array(s.values, Axis(s.index.values, name), meta=meta) -def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfold_last_axis_name=False, - fill_value=nan, meta=None, cartesian_prod=True, **kwargs) -> Array: +def from_frame(df, + sort_rows=False, + sort_columns=False, + parse_header=False, + unfold_last_axis_name=False, + fill_value=nan, + meta=None, + cartesian_prod=True, + **kwargs) -> Array: r""" Convert Pandas DataFrame into Array. @@ -221,7 +231,7 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo cartesian_prod : bool, optional Whether to expand the dataframe to a cartesian product dataframe as needed by Array. This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already - well formed. If True, arguments `sort_rows` and `sort_columns` must be set to False. + well-formed. If True, arguments `sort_rows` and `sort_columns` must be set to False. Defaults to True. Returns @@ -395,7 +405,7 @@ def parse_axis_name(name): # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas # we also take the opportunity to change axes with empty name to real anonymous axes (name is None) to - # make them roundtrip correctly, based on the assumption that in an in-memory LArray an anonymouse axis is more + # make them roundtrip correctly, based on the assumption that in an in-memory LArray an anonymous axis is more # likely and useful than an Axis with an empty name. # TODO : find a more robust and elegant solution res = res.rename({axis: None for axis in res.axes if (isinstance(axis.name, str) From f5647795a30f2f2c638b8d9ae6fffcb1b55ac5f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Fri, 13 Mar 2026 17:52:38 +0100 Subject: [PATCH 16/28] FIX: fixed from_frame returning readonly Arrays on Pandas >= 3.0 (fixes #1170) This also indirectly affected most file reading functions and implements a new copy argument in from_frame to ensure data is not shared --- doc/source/changes/version_0_35_1.rst.inc | 3 + larray/inout/csv.py | 7 +- larray/inout/pandas.py | 100 +++++++++++++++++++--- larray/inout/stata.py | 5 +- larray/tests/test_array.py | 85 +++++++++++++++++- larray/util/misc.py | 24 ++++++ 6 files changed, 205 insertions(+), 19 deletions(-) diff --git a/doc/source/changes/version_0_35_1.rst.inc b/doc/source/changes/version_0_35_1.rst.inc index 2a7593770..3d9ba934d 100644 --- a/doc/source/changes/version_0_35_1.rst.inc +++ b/doc/source/changes/version_0_35_1.rst.inc @@ -83,3 +83,6 @@ Fixes Pandas >= 3.0: mixed type columns, some combination of labels in index columns not being present in the data and a non-NaN fill_value (closes :issue:`1166`). + +* fixed most data reading functions returning readonly Arrays when using + Pandas >= 3.0 (closes :issue:`1170`). diff --git a/larray/inout/csv.py b/larray/inout/csv.py index e4acac94b..b8dbe6a7e 100644 --- a/larray/inout/csv.py +++ b/larray/inout/csv.py @@ -229,7 +229,12 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse df.index.names = combined_axes_names.split(headersep) raw = False - return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide) + return df_asarray(df, + sort_rows=sort_rows, + sort_columns=sort_columns, + fill_value=fill_value, + raw=raw, + wide=wide) def read_tsv(filepath_or_buffer, **kwargs) -> Array: diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index a8a254792..f2cc936b0 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -118,7 +118,12 @@ def cartesian_product_df(df, fill_value=fill_value, **kwargs), combined_labels -def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs) -> Array: +def from_series(s, + sort_rows=False, + fill_value=nan, + meta=None, + copy=True, + **kwargs) -> Array: r""" Convert Pandas Series into Array. @@ -134,6 +139,11 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs) -> Arra meta : list of pairs or dict or Metadata, optional Metadata (title, description, author, creation_date, ...) associated with the array. Keys must be strings. Values must be of type string, int, float, date, time or datetime. + copy : bool, optional + Whether to copy the data from the Series. Defaults to True. + copy=False does not guarantee that no copy will be made, only that a + copy is only done when necessary. If the resulting array shares the same + data buffer than the original series, it will be read-only. Returns ------- @@ -182,13 +192,30 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs) -> Arra else: index = labels[0] columns = labels[-1] + # no need to use copy=False since Pandas implemented copy-on-write + # and will only copy if necessary df = df.reindex(index=index, columns=columns, fill_value=fill_value) - return from_frame(df, sort_rows=sort_rows, sort_columns=sort_rows, fill_value=fill_value, meta=meta, **kwargs) + # copy=False because unstack above already copied the data + res = from_frame(df, + sort_rows=sort_rows, + sort_columns=sort_rows, + fill_value=fill_value, + meta=meta, + copy=False, + **kwargs) + res.data.flags.writeable = True + return res else: name = decode(s.name, 'utf8') if s.name is not None else decode(s.index.name, 'utf8') if sort_rows: s = s.sort_index() - return Array(s.values, Axis(s.index.values, name), meta=meta) + # sort_index copies the data, no need to copy it again + values = s.to_numpy(copy=False) + values.flags.writeable = True + else: + values = s.to_numpy(copy=copy) + labels = s.index.to_numpy(copy=copy) + return Array(values, Axis(labels, name), meta=meta) def from_frame(df, @@ -199,6 +226,7 @@ def from_frame(df, fill_value=nan, meta=None, cartesian_prod=True, + copy=True, **kwargs) -> Array: r""" Convert Pandas DataFrame into Array. @@ -233,6 +261,11 @@ def from_frame(df, This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already well-formed. If True, arguments `sort_rows` and `sort_columns` must be set to False. Defaults to True. + copy : bool, optional + Whether to copy the data from the DataFrame. Defaults to True. + copy=False does not guarantee that no copy will be made, only that a + copy is only done when necessary. If the resulting array shares the same + data buffer than the original series, it will be read-only. Returns ------- @@ -275,9 +308,21 @@ def from_frame(df, else: axes_names += df.columns.names + make_writable = False if cartesian_prod: - df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns, - fill_value=fill_value, **kwargs) + orig_df = df + df, axes_labels = ( + cartesian_product_df(df, + sort_rows=sort_rows, + sort_columns=sort_columns, + fill_value=fill_value, + **kwargs) + ) + # we already copied the data so we can avoid copying it again when + # converting to numpy array + if copy and df is not orig_df: + copy = False + make_writable = True else: if sort_rows or sort_columns: raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. ' @@ -295,7 +340,9 @@ def from_frame(df, # TODO: use zip(..., strict=True) instead when we drop support for Python 3.9 assert len(axes_labels) == len(axes_names) axes = AxisCollection([Axis(labels, name) for labels, name in zip(axes_labels, axes_names)]) - data = df.values.reshape(axes.shape) + data = df.to_numpy(copy=copy).reshape(axes.shape) + if make_writable: + data.flags.writeable = True return Array(data, axes, meta=meta) @@ -305,7 +352,7 @@ def set_dataframe_index_by_position(df, index_col_indices): This is necessary to support creating an index from columns without a name or with duplicate names. - Return a new Dataframe + Return a new Dataframe (no shared data) """ if not isinstance(index_col_indices, list): index_col_indices = [index_col_indices] @@ -319,11 +366,27 @@ def set_dataframe_index_by_position(df, index_col_indices): return df -def df_asarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header=True, wide=True, cartesian_prod=True, +def df_asarray(df, + sort_rows=False, + sort_columns=False, + raw=False, + parse_header=True, + wide=True, + cartesian_prod=True, **kwargs) -> Array: r""" Prepare Pandas DataFrame and then convert it into Array. + Warning + ------- + This function actively tries to share data with the input dataframe + (equivalent to copy=False in from_frame) and will always return a writable + array (whether the backing array is shared with the input dataframe or not). + + This is not a problem because we only use this function internally with + dataframes we created ourselves (by reading/converting files or other + data structures). + Parameters ---------- df : Pandas DataFrame @@ -349,7 +412,7 @@ def df_asarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header= cartesian_prod : bool, optional Whether to expand the dataframe to a cartesian product dataframe as needed by Array. This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already - well formed. If True, arguments `sort_rows` and `sort_columns` must be set to False. + well-formed. If True, arguments `sort_rows` and `sort_columns` must be set to False. Defaults to True. Returns @@ -379,7 +442,10 @@ def df_asarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header= df = set_dataframe_index_by_position(df, list(range(len(df.columns) - 1))) series = df.iloc[:, -1] series.name = df.index.name - return from_series(series, sort_rows=sort_columns, **kwargs) + # copy=False because set_dataframe_index_by_position copies data + res = from_series(series, sort_rows=sort_columns, copy=False, **kwargs) + res.data.flags.writeable = True + return res # handle 1D arrays if len(df) == 1 and (pd.isnull(df.index.values[0]) @@ -390,7 +456,7 @@ def df_asarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header= series.name = df.index.name if sort_rows: raise ValueError('sort_rows=True is not valid for 1D arrays. Please use sort_columns instead.') - res = from_series(series, sort_rows=sort_columns) + res = from_series(series, sort_rows=sort_columns, copy=False) else: def parse_axis_name(name): if isinstance(name, bytes): @@ -400,8 +466,16 @@ def parse_axis_name(name): return name axes_names = [parse_axis_name(name) for name in df.index.names] unfold_last_axis_name = isinstance(axes_names[-1], str) and '\\' in axes_names[-1] - res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header, - unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs) + res = from_frame(df, + sort_rows=sort_rows, + sort_columns=sort_columns, + parse_header=parse_header, + unfold_last_axis_name=unfold_last_axis_name, + cartesian_prod=cartesian_prod, + copy=False, + **kwargs) + + res.data.flags.writeable = True # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas # we also take the opportunity to change axes with empty name to real anonymous axes (name is None) to diff --git a/larray/inout/stata.py b/larray/inout/stata.py index 6a3f06f6f..18c1c8c4b 100644 --- a/larray/inout/stata.py +++ b/larray/inout/stata.py @@ -49,4 +49,7 @@ def read_stata(filepath_or_buffer, index_col=None, sort_rows=False, sort_columns 2 FR F """ df = pd.read_stata(filepath_or_buffer, index_col=index_col, **kwargs) - return from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns) + return from_frame(df, + sort_rows=sort_rows, + sort_columns=sort_columns, + copy=False) diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index fa795f834..adb1f7a29 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -26,7 +26,7 @@ from larray.core.axis import ( _to_ticks, _to_key, _retarget_warn_msg, _group_as_aggregated_label_msg ) -from larray.util.misc import LHDFStore +from larray.util.misc import LHDFStore, PANDAS30_OR_LATER # avoid flake8 errors meta = meta @@ -3707,6 +3707,7 @@ def test_hdf_roundtrip(tmp_path, meta): assert_larray_equal(res, arr) assert res.meta == arr.meta + assert res.data.flags.writeable # issue 72: int-like strings should not be parsed (should round-trip correctly) fpath = tmp_path / 'issue72.h5' @@ -3718,6 +3719,7 @@ def test_hdf_roundtrip(tmp_path, meta): axis = res.axes[0] assert axis.name == 'axis' assert list(axis.labels) == ['10', '20'] + assert res.data.flags.writeable # passing group as key to to_hdf a3 = ndtest((4, 3, 4)) @@ -3776,22 +3778,27 @@ def test_from_string(): res = from_string(""" c c0 c1 \t 0 1""") assert_larray_equal(res, expected) + assert res.data.flags.writeable res = from_string(r""" c c0 c1 nan 0 1""") assert_larray_equal(res, expected) + assert res.data.flags.writeable res = from_string(r""" c c0 c1 NaN 0 1""") assert_larray_equal(res, expected) + assert res.data.flags.writeable def test_read_csv(): res = read_csv(inputpath('test1d.csv')) assert_larray_equal(res, io_1d) + assert res.data.flags.writeable res = read_csv(inputpath('test2d.csv')) assert_larray_equal(res, io_2d) + assert res.data.flags.writeable res = read_csv(inputpath('test3d.csv')) assert_larray_equal(res, io_3d) @@ -3807,12 +3814,14 @@ def test_read_csv(): assert arr.shape == (3,) assert arr.axes.names == ['time'] assert list(arr.data) == [3722, 3395, 3347] + assert res.data.flags.writeable arr = read_csv(inputpath('test5d_liam2.csv'), dialect='liam2') assert arr.ndim == 5 assert arr.shape == (2, 5, 2, 2, 3) assert arr.axes.names == ['arr', 'age', 'sex', 'nat', 'time'] assert list(arr[X.arr[1], 0, 'F', X.nat[1], :].data) == [3722, 3395, 3347] + assert res.data.flags.writeable # missing values res = read_csv(inputpath('testmissing_values.csv')) @@ -3831,9 +3840,11 @@ def test_read_csv(): ################# res = read_csv(inputpath('test1d_narrow.csv'), wide=False) assert_larray_equal(res, io_1d) + assert res.data.flags.writeable res = read_csv(inputpath('test2d_narrow.csv'), wide=False) assert_larray_equal(res, io_2d) + assert res.data.flags.writeable res = read_csv(inputpath('test3d_narrow.csv'), wide=False) assert_larray_equal(res, io_3d) @@ -3854,15 +3865,18 @@ def test_read_eurostat(): assert arr.axes.names == ['arr', 'age', 'sex', 'nat', 'time'] # FIXME: integer labels should be parsed as such assert list(arr[X.arr['1'], '0', 'F', X.nat['1'], :].data) == [3722, 3395, 3347] + assert arr.data.flags.writeable @needs_xlwings def test_read_excel_xlwings(): arr = read_excel(inputpath('test.xlsx'), '1d') assert_larray_equal(arr, io_1d) + assert arr.data.flags.writeable arr = read_excel(inputpath('test.xlsx'), '2d') assert_larray_equal(arr, io_2d) + assert arr.data.flags.writeable arr = read_excel(inputpath('test.xlsx'), '2d_classic') assert_larray_equal(arr, ndtest("a=a0..a2; b0..b2")) @@ -3963,9 +3977,11 @@ def test_read_excel_xlwings(): def test_read_excel_pandas(): arr = read_excel(inputpath('test.xlsx'), '1d', engine='openpyxl') assert_larray_equal(arr, io_1d) + assert arr.data.flags.writeable arr = read_excel(inputpath('test.xlsx'), '2d', engine='openpyxl') assert_larray_equal(arr, io_2d) + assert arr.data.flags.writeable arr = read_excel(inputpath('test.xlsx'), '2d', nb_axes=2, engine='openpyxl') assert_larray_equal(arr, io_2d) @@ -4006,12 +4022,15 @@ def test_read_excel_pandas(): ################# arr = read_excel(inputpath('test_narrow.xlsx'), '1d', wide=False, engine='openpyxl') assert_larray_equal(arr, io_1d) + assert arr.data.flags.writeable arr = read_excel(inputpath('test_narrow.xlsx'), '2d', wide=False, engine='openpyxl') assert_larray_equal(arr, io_2d) + assert arr.data.flags.writeable arr = read_excel(inputpath('test_narrow.xlsx'), '3d', wide=False, engine='openpyxl') assert_larray_equal(arr, io_3d) + assert arr.data.flags.writeable # missing rows + fill_value argument arr = read_excel(inputpath('test_narrow.xlsx'), 'missing_values', @@ -4019,10 +4038,12 @@ def test_read_excel_pandas(): expected = io_narrow_missing_values.copy() expected[isnan(expected)] = 42 assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # unsorted values arr = read_excel(inputpath('test_narrow.xlsx'), 'unsorted', wide=False, engine='openpyxl') assert_larray_equal(arr, io_unsorted) + assert arr.data.flags.writeable def test_from_lists(): @@ -4035,6 +4056,7 @@ def test_from_lists(): ['a1', 'b0', 6, 7, 8], # noqa: E241 ['a1', 'b1', 9, 10, 11]]) # noqa: E241 assert_larray_equal(res, expected) + assert res.data.flags.writeable # simple (using dump). This should be the same test as above. # We just make sure dump() and from_lists() round-trip correctly. @@ -4114,13 +4136,35 @@ def test_from_series(): # Series with Index as index expected = ndtest(3) s = pd.Series([0, 1, 2], index=pd.Index(['a0', 'a1', 'a2'], name='a')) - assert_larray_equal(from_series(s), expected) + res = from_series(s) + assert_larray_equal(res, expected) + assert res.data.flags.writeable + + # Without sorting we can share the data buffer, but in that case + # the result must be read-only with Pandas 3.0+ + s2 = s.copy() + res = from_series(s2, copy=False) + assert_larray_equal(res, expected) + if PANDAS30_OR_LATER: + assert not res.data.flags.writeable + res.data.flags.writeable = True + # I don't know how to test explicitly that the buffers are shared because + # the underlying buffer from the array "is not" the same, that is: + # s.to_numpy(copy=False).data is not s.to_numpy(copy=False).data + # so I use this indirect test instead: modifying an item of the array + # modifies the series + res['a1'] = 42 + assert s2['a1'] == 42 s = pd.Series([2, 0, 1], index=pd.Index(['a2', 'a0', 'a1'], name='a')) - assert_larray_equal(from_series(s, sort_rows=True), expected) + res = from_series(s, sort_rows=True) + assert_larray_equal(res, expected) + assert res.data.flags.writeable expected = ndtest(3)[['a2', 'a0', 'a1']] - assert_larray_equal(from_series(s), expected) + res = from_series(s) + assert_larray_equal(res, expected) + assert res.data.flags.writeable # Series with MultiIndex as index a = Axis('a=0..3') @@ -4134,14 +4178,17 @@ def test_from_series(): res = from_series(s) assert_larray_equal(res, expected) + assert res.data.flags.writeable res = from_series(s, sort_rows=True) assert_larray_equal(res, expected.sort_labels()) + assert res.data.flags.writeable expected[0, 'F'] = -1 s = s.reset_index().drop([3, 4, 5]).set_index(['a', 'gender', 'time'])[0] res = from_series(s, fill_value=-1) assert_larray_equal(res, expected) + assert res.data.flags.writeable def test_to_frame(): @@ -4204,6 +4251,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data.reshape((1, 1)), [axis_index, axis_columns]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # anonymous columns # input dataframe: @@ -4224,6 +4272,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data.reshape((1, 1)), [axis_index.rename('index'), axis_columns]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # anonymous columns/non string row axis name # input dataframe: @@ -4242,6 +4291,7 @@ def test_from_frame(): assert res.shape == (1, 1) assert res.axes.names == [0, None] assert_larray_equal(res, expected) + assert arr.data.flags.writeable # anonymous index # input dataframe: @@ -4261,6 +4311,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data.reshape((1, 1)), [axis_index, axis_columns.rename('columns')]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # index and columns with name # input dataframe: @@ -4281,6 +4332,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data.reshape((1, 1)), [axis_index.rename('index'), axis_columns.rename('columns')]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # 2) data = vector # ================ @@ -4299,6 +4351,7 @@ def test_from_frame(): assert df.columns.name is None assert list(df.index.values) == indexes assert list(df.columns.values) == columns + assert arr.data.flags.writeable # anonymous indexes/columns # input dataframe: @@ -4317,6 +4370,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data.reshape((1, size)), [axis_index, axis_columns]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # anonymous columns # input dataframe: @@ -4337,6 +4391,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data.reshape((1, size)), [axis_index.rename('index'), axis_columns]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # anonymous index # input dataframe: @@ -4356,6 +4411,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data.reshape((1, size)), [axis_index, axis_columns.rename('columns')]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # index and columns with name # input dataframe: @@ -4376,6 +4432,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data.reshape((1, size)), [axis_index.rename('index'), axis_columns.rename('columns')]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # 2B) data = vertical vector (N x 1) # ================================== @@ -4390,6 +4447,7 @@ def test_from_frame(): assert df.columns.name is None assert list(df.index.values) == indexes assert list(df.columns.values) == columns + assert arr.data.flags.writeable # anonymous indexes/columns # input dataframe: @@ -4412,6 +4470,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data, [axis_index, axis_columns]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # anonymous columns # input dataframe: @@ -4436,6 +4495,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data, [axis_index.rename('index'), axis_columns]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # anonymous index # input dataframe: @@ -4459,6 +4519,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data, [axis_index, axis_columns.rename('columns')]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # index and columns with name # input dataframe: @@ -4482,6 +4543,7 @@ def test_from_frame(): assert list(arr.axes.labels[1]) == columns expected = Array(data, [axis_index, axis_columns.rename('columns')]) assert_larray_equal(arr, expected) + assert arr.data.flags.writeable # 3) 3D array # =========== @@ -4509,6 +4571,7 @@ def test_from_frame(): assert arr.shape == (4, 2, 3) assert arr.axes.names == ['a', 'c', 'time'] assert list(arr[0, 'c1', :].data) == [3722, 3395, 3347] + assert arr.data.flags.writeable # 3B) Dataframe with columns.name containing \ # ============================================ @@ -4532,6 +4595,7 @@ def test_from_frame(): assert arr.shape == (4, 2, 3) assert arr.axes.names == ['a', 'c', 'time'] assert_nparray_equal(arr[0, 'c1', :].data, np.array([3722, 3395, 3347])) + assert arr.data.flags.writeable # 3C) Dataframe with no axe names (names are None) # =============================== @@ -4539,6 +4603,7 @@ def test_from_frame(): df_no_names = arr_no_names.df res = from_frame(df_no_names) assert_larray_equal(res, arr_no_names) + assert res.data.flags.writeable # 3D) Dataframe with empty axe names (names are '') # ================================== @@ -4547,6 +4612,7 @@ def test_from_frame(): df_empty_names = arr_empty_names.df res = from_frame(df_empty_names) assert_larray_equal(res, arr_empty_names) + assert res.data.flags.writeable # 4) test sort_rows and sort_columns arguments # ============================================ @@ -4564,6 +4630,7 @@ def test_from_frame(): expected = expected.sort_labels() res = from_frame(df, sort_rows=True, sort_columns=True) assert_larray_equal(res, expected) + assert res.data.flags.writeable # df.index is a MultiIndex instance expected = ndtest((a, gender, time)) @@ -4573,6 +4640,7 @@ def test_from_frame(): res = from_frame(df, sort_rows=True, sort_columns=True) assert_larray_equal(res, expected.sort_labels()) + assert res.data.flags.writeable # 5) test fill_value # ================== @@ -4580,6 +4648,7 @@ def test_from_frame(): df = df.reset_index().drop([3]).set_index(['a', 'gender']) res = from_frame(df, fill_value=-1) assert_larray_equal(res, expected) + assert res.data.flags.writeable # 6) with a multi-index in columns # ================================ @@ -4589,24 +4658,28 @@ def test_from_frame(): df = arr.to_frame(ncolaxes=2) res = from_frame(df) assert_larray_equal(res, arr) + assert res.data.flags.writeable # b) with duplicated axis names arr = ndtest("a=a0,a1;a=b0,b1;a=c0,c1;a=d0,d1") df = arr.to_frame(ncolaxes=2) res = from_frame(df) assert_larray_equal(res, arr) + assert res.data.flags.writeable # c) with duplicated axes names and labels arr = ndtest("a=a0,a1;a=a0,a1;a=a0,a1;a=a0,a1") df = arr.to_frame(ncolaxes=2) res = from_frame(df) assert_larray_equal(res, arr) + assert res.data.flags.writeable # d) with unsorted labels arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0") df = arr.to_frame(ncolaxes=2) res = from_frame(df) assert_larray_equal(res, arr) + assert res.data.flags.writeable # e) with sorting of unsorted column labels arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0") @@ -4623,6 +4696,7 @@ def test_from_frame(): a0 b0 c1 13 12""") res = from_frame(df, sort_columns=True) assert_larray_equal(res, expected) + assert res.data.flags.writeable # f) with sorting of unsorted row labels arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0") @@ -4639,6 +4713,7 @@ def test_from_frame(): a1 b1 c0 2 3""") res = from_frame(df, sort_rows=True) assert_larray_equal(res, expected) + assert res.data.flags.writeable # g) with sorting of all unsorted labels arr = ndtest("a=a1,a0;b=b1,b0;c=c1,c0;d=d1,d0") @@ -4655,6 +4730,7 @@ def test_from_frame(): a1 b1 c1 1 0""") res = from_frame(df, sort_rows=True, sort_columns=True) assert_larray_equal(res, expected) + assert res.data.flags.writeable # h) with a MultiIndex with some missing combination and a fill_value which # is not valid for all columns. This pattern broke for Pandas >= 3 @@ -4687,6 +4763,7 @@ def test_from_frame(): ] expected = Array(expected_data, expected_axes, dtype=object) assert_larray_equal(res, expected) + assert res.data.flags.writeable def test_asarray(): diff --git a/larray/util/misc.py b/larray/util/misc.py index 87f9539e4..690becc6f 100644 --- a/larray/util/misc.py +++ b/larray/util/misc.py @@ -6,6 +6,7 @@ import math import itertools import os +import re import sys import operator import warnings @@ -1134,4 +1135,27 @@ def find_names(obj, depth=0): return sorted(names) +SUFFIX_PATTERN = re.compile(r'[a-zA-Z_\-]+[0-9]?') + +def version_str_to_tuple(s: str): + """ + Tranform a version string into a tuple of integers, ignoring any suffixes. + + Examples + -------- + >>> version_str_to_tuple('3.1.4') + (3, 1, 4) + >>> version_str_to_tuple('3.1.4-dev') + (3, 1, 4) + >>> version_str_to_tuple('3.1.4-b1') + (3, 1, 4) + >>> version_str_to_tuple('3.0.0rc0') + (3, 0, 0) + """ + parts = s.split('.') + cleaned_parts = [SUFFIX_PATTERN.sub('', part) for part in parts] + return tuple(int(part) for part in cleaned_parts if part) + + PY312_OR_LATER = sys.version_info[:2] >= (3, 12) +PANDAS30_OR_LATER = version_str_to_tuple(pd.__version__) >= (3, 0) From ae1634d51acaf1278003ae309179b13d42e75e94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Fri, 13 Mar 2026 18:10:29 +0100 Subject: [PATCH 17/28] DOC: better assert message --- larray/inout/pandas.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index f2cc936b0..d6259b39f 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -338,7 +338,10 @@ def from_frame(df, axes_labels[i] = [parse(cell) for cell in axes_labels[i]] # TODO: use zip(..., strict=True) instead when we drop support for Python 3.9 - assert len(axes_labels) == len(axes_names) + assert len(axes_labels) == len(axes_names), \ + (f"number of axes labels and axes names do not match: " + f"{len(axes_labels)} vs {len(axes_names)}:\n" + f"{axes_labels}\n\nvs\n\n{axes_names}") axes = AxisCollection([Axis(labels, name) for labels, name in zip(axes_labels, axes_names)]) data = df.to_numpy(copy=copy).reshape(axes.shape) if make_writable: From 0e03fa18bb593f9ffc1ff06c8efb182f6399a6ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Fri, 13 Mar 2026 18:11:08 +0100 Subject: [PATCH 18/28] TEST: better assert message when array shapes differ --- larray/tests/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/larray/tests/common.py b/larray/tests/common.py index 4b0dfb196..ffa3536c6 100644 --- a/larray/tests/common.py +++ b/larray/tests/common.py @@ -99,7 +99,8 @@ def assert_equal(a, b): else: assert isinstance(a, np.ndarray) and isinstance(b, np.ndarray) if check_shape and a.shape != b.shape: - raise AssertionError(f"shapes differ: {a.shape} != {b.shape}") + raise AssertionError(f"shapes differ: {a.shape} != {b.shape}\n" + f"got:\n\n{a}\n\nexpected:\n\n{b}") equal = test_func(a, b) if not equal.all(): notequal = ~equal From 601db5e94cb84417c675a090c3df720cb9785fc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Mon, 16 Mar 2026 12:47:42 +0100 Subject: [PATCH 19/28] CLN: removed obsolete TODO and remove nonsensical comment --- larray/core/array.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/larray/core/array.py b/larray/core/array.py index e9c7f11fc..fd7c8bc21 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -2243,10 +2243,6 @@ def __getitem__(self, key, collapse_slices=False, translate_key=True, points=Fal return res_data def __setitem__(self, key, value, collapse_slices=True, translate_key=True, points=False) -> None: - # TODO: if key or value has more axes than self, we could use - # total_axes = self.axes + key.axes + value.axes - # expanded = self.expand(total_axes) - # data = np.asarray(expanded.data) raw_broadcasted_key, target_axes, _ = \ self.axes._key_to_raw_and_axes(key, collapse_slices, translate_key, points, wildcard=True) if isinstance(value, Array): @@ -2255,6 +2251,16 @@ def __setitem__(self, key, value, collapse_slices=True, translate_key=True, poin value = value.broadcast_with(target_axes, check_compatible=True) else: target_axes = [] + + # Even though we could theoretically allow more axes than self in value + # (in key, it already does the right thing) by expanding the data to + # the axes union, I don't think it is a good idea because it seems to + # me more likely to do this by mistake than intentionally. Even though: + # orig_arr = arr; arr[key] = value; orig_arr is arr + # would still be True, + # orig_data = arr.data; arr[key] = value; orig_data is arr.data + # would not. + # replace incomprehensible error message "could not broadcast input array from shape XX into shape YY" # for users by "incompatible axes" extra_axes = [axis for axis in value.axes - target_axes if len(axis) > 1] @@ -2267,16 +2273,6 @@ def __setitem__(self, key, value, collapse_slices=True, translate_key=True, poin value = value.data self.data[raw_broadcasted_key] = value - # concerning keys this can make sense in several cases: - # single bool Array key with extra axes. - # tuple of bool Array keys (eg one for each axis). each could have extra axes. Common axes between keys are - # not a problem, we can simply "and" them. Though we should avoid explicitly "and"ing them if there is no - # common axis because that is less efficient than the implicit "and" that is done by numpy __getitem__ (and - # the fact we need to combine dimensions when any key has more than 1 dim). - - # the bool value represents whether the axis label is taken or not if any bool key (part) has more than one - # axis, we get combined dimensions out of it. - def set(self, value, **kwargs) -> None: r""" Set a subset of array to value. From 31df0898ec09489430dc57e4cabe826af0512083 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Mon, 16 Mar 2026 16:39:17 +0100 Subject: [PATCH 20/28] FIX: sequence(ax, inc=const_or_arr_whout_ax, mult=const_or_arr_whout_ax) also added missing tests for that case and avoid a warning --- doc/source/changes/version_0_35_1.rst.inc | 2 ++ larray/core/array.py | 39 ++++++++++---------- larray/tests/test_array.py | 44 +++++++++++++++++++++++ 3 files changed, 67 insertions(+), 18 deletions(-) diff --git a/doc/source/changes/version_0_35_1.rst.inc b/doc/source/changes/version_0_35_1.rst.inc index 3d9ba934d..0204c0990 100644 --- a/doc/source/changes/version_0_35_1.rst.inc +++ b/doc/source/changes/version_0_35_1.rst.inc @@ -86,3 +86,5 @@ Fixes * fixed most data reading functions returning readonly Arrays when using Pandas >= 3.0 (closes :issue:`1170`). + +* fixed `sequence()` when both `inc` and `mult` are defined. diff --git a/larray/core/array.py b/larray/core/array.py index fd7c8bc21..6ed9aafb1 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -8930,17 +8930,17 @@ def strip_axes(col): def has_axis(a, axis): return isinstance(a, Array) and axis in a.axes - def array_or_full(a, axis, initial): - dt = common_dtype((a, initial)) - r = empty(strip_axes(initial) | strip_axes(a) | axis, dtype=dt) - r[axis.i[0]] = initial - if isinstance(a, Array) and axis in a.axes: - # not using axis.i[1:] because a could have less ticks - # on axis than axis - r[axis.i[1:]] = a[axis[axis.labels[1]:]] + def array_or_full(value, seq_axis, initial): + dt = common_dtype((value, initial)) + res = empty(strip_axes(initial) | strip_axes(value) | seq_axis, dtype=dt) + res[seq_axis.i[0]] = initial + if isinstance(value, Array) and seq_axis in value.axes: + # not using seq_axis.i[1:] on the right side because value could + # have less ticks on its axis than seq_axis + res[seq_axis.i[1:]] = value[seq_axis[seq_axis.labels[1]:]] else: - r[axis.i[1:]] = a - return r + res[seq_axis.i[1:]] = value + return res if axes is None: # we need to remove axis if present, because it might be incompatible @@ -8970,14 +8970,14 @@ def array_or_full(a, axis, initial): # inc only (array) elif np.isscalar(mult) and mult == 1: inc_array = array_or_full(inc, axis, initial) - # TODO: when axis is None, this is inefficient (inc_array.cumsum() is the result) + # TODO: when axes is None, this is inefficient (inc_array.cumsum() is the result) res[axis.i[0]] = initial res[axis.i[1:]] = inc_array.cumsum(axis)[axis.i[1:]] # mult only (scalar or array) elif np.isscalar(inc) and inc == 0: mult_array = array_or_full(mult, axis, initial) res[axis.i[0]] = initial - # TODO: when axis is None, this is inefficient (mult_array.cumprod() is the result) + # TODO: when axes is None, this is inefficient (mult_array.cumprod() is the result) res[axis.i[1:]] = mult_array.cumprod(axis)[axis.i[1:]] # both inc and mult defined but constant (scalars or axis not present) elif not has_axis(inc, axis) and not has_axis(mult, axis): @@ -9001,16 +9001,19 @@ def array_or_full(a, axis, initial): # a[i] = initial * cum_mult[i] + inc * cum_mult[i - 1] - # the case "mult == 1" was already handled above but we still need to handle the case where mult is - # an array and *one cell* == 1 - res_where_not_1 = ((1 - cum_mult) / (1 - mult)) * inc + initial * cum_mult + # the case "scalar_mult == 1" is already handled above but we still + # need to handle the case where mult is an array and *some* cells are 1 + res_where_not_1 = ((1 - cum_mult).divnot0(1 - mult)) * inc + initial * cum_mult if isinstance(mult, Array) and any(mult == 1): from larray.core.ufuncs import where - res_where_1 = Array(np.linspace(initial, initial + inc * (len(axis) - 1), len(axis)), axis) - res[axis.i[1:]] = where(mult == 1, res_where_1, res_where_not_1) + # equivalent to: + # res_where_1 = sequence(axis, initial=initial, inc=inc)[axis.i[1:]] + res_where_1 = sequence(axis.i[1:], initial=initial + inc, inc=inc) + res_not_initial = where(mult == 1, res_where_1, res_where_not_1) + res[axis.i[1:]] = res_not_initial.astype(res_dtype, copy=False) else: - res[axis.i[1:]] = res_where_not_1 + res[axis.i[1:]] = res_where_not_1.astype(res_dtype, copy=False) else: assert has_axis(inc, axis) or has_axis(mult, axis) # This case is more complicated to vectorize. It seems diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index adb1f7a29..9385193a3 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -3165,6 +3165,50 @@ def test_sequence(): a1 2.0 4.0 4.0""") assert_larray_equal(res, expected) + # both inc and mult defined but constant (scalars) + initial = Array([1, 2], 'a=a0,a1') + res = sequence('b=b0..b2', initial=initial, mult=2, inc=1) + expected = from_string(r""" + a\b b0 b1 b2 + a0 1 3 7 + a1 2 5 11""") + assert_larray_equal(res, expected) + + # both inc and mult defined but "constant" array (sequence axis not present) + initial = Array([1, 2], 'a=a0,a1') + mult = Array([2, 3], 'a=a0,a1') + inc = Array([1, 2], 'a=a0,a1') + res = sequence('b=b0..b2', initial=initial, mult=mult, inc=inc) + # 1 (res_a0_b0) = 1 (initial_a0) + # 3 (res_a0_b1) = 1 (res_a0_b0) * 2 (mult_a0) + 1 (inc_a0) + # 7 (res_a0_b2) = 3 (res_a0_b1) * 2 (mult_a0) + 1 (inc_a0) + # 2 (res_a1_b0) = 2 (initial_a1) + # 8 (res_a1_b1) = 2 (res_a1_b0) * 3 (mult_a1) + 2 (inc_a1) + # 26 (res_a1_b2) = 8 (res_a1_b1) * 3 (mult_a1) + 2 (inc_a1) + expected = from_string(r""" + a\b b0 b1 b2 + a0 1 3 7 + a1 2 8 26""") + assert_larray_equal(res, expected) + + # both inc and mult defined but "constant" array (sequence axis not present) + # and a mult cell equal to 1 + initial = Array([1, 2], 'a=a0,a1') + mult = Array([2, 1], 'a=a0,a1') + inc = Array([3, 5], 'a=a0,a1') + res = sequence('b=b0..b2', initial=initial, mult=mult, inc=inc) + # 1 (res_a0_b0) = 1 (initial_a0) + # 5 (res_a0_b1) = 1 (res_a0_b0) * 2 (mult_a0) + 3 (inc_a0) + # 13 (res_a0_b2) = 5 (res_a0_b1) * 2 (mult_a0) + 3 (inc_a0) + # 2 (res_a1_b0) = 2 (initial_a1) + # 7 (res_a1_b1) = 2 (res_a1_b0) * 1 (mult_a1) + 5 (inc_a1) + # 12 (res_a1_b2) = 7 (res_a1_b1) * 1 (mult_a1) + 5 (inc_a1) + expected = from_string(r""" + a\b b0 b1 b2 + a0 1 5 13 + a1 2 7 12""") + assert_larray_equal(res, expected) + def test_sort_values(): # 1D arrays From 6385a39dbaec840fdb47da5bccf5d8c239da3b73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Mon, 16 Mar 2026 16:44:50 +0100 Subject: [PATCH 21/28] FIX: added warning when arr[key] = value uses unsafe casting (fixes #1164) --- doc/source/changes/version_0_35_1.rst.inc | 75 ++++++++--------------- larray/core/array.py | 57 ++++++++++++++++- larray/core/checked.py | 4 +- larray/tests/test_array.py | 44 +++++++++++-- 4 files changed, 119 insertions(+), 61 deletions(-) diff --git a/doc/source/changes/version_0_35_1.rst.inc b/doc/source/changes/version_0_35_1.rst.inc index 0204c0990..18122033a 100644 --- a/doc/source/changes/version_0_35_1.rst.inc +++ b/doc/source/changes/version_0_35_1.rst.inc @@ -1,56 +1,38 @@ .. py:currentmodule:: larray -Syntax changes -^^^^^^^^^^^^^^ - -* renamed ``Array.old_method_name()`` to :py:obj:`Array.new_method_name()` (closes :issue:`1`). - -* renamed ``old_argument_name`` argument of :py:obj:`Array.method_name()` to ``new_argument_name``. - - -Backward incompatible changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -* other backward incompatible changes - - -New features -^^^^^^^^^^^^ - -* added a feature (see the :ref:`miscellaneous section ` for details). It works on :ref:`api-axis` and - :ref:`api-group` objects. - - Here is an example of the new feature: - - >>> arr = ndtest((2, 3)) - >>> arr - a\b b0 b1 b2 - a0 0 1 2 - a1 3 4 5 +Miscellaneous improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^ - And it can also be used like this: +* added warning when setting an array subset causes an unsafe type conversion + (which could lead to information loss). For example, when setting an integer + array with a float value, the float value is truncated to an integer + value, loosing the decimal part. The behavior did not change (yet) but + a warning is now emitted (whether there is actual loss of information or + not). Closes :issue:`1164`. - >>> arr = ndtest("a=a0..a2") + >>> arr = ndtest(3) >>> arr a a0 a1 a2 0 1 2 + >>> arr.dtype + dtype('int64') + >>> arr['a1'] = 3.9 + Setting a subset of an array with int64 dtype with a value with float dtype. -* added another feature in the editor (closes :editor_issue:`1`). - - .. note:: - - - It works for foo bar ! - - It does not work for foo baz ! - + It will be converted to int64 like in previous larray versions but this is + not a safe operation (some information could be lost in the conversion). -.. _misc: + If you want to keep doing this conversion and silence this warning, please + convert the value explicitly using value.astype(). -Miscellaneous improvements -^^^^^^^^^^^^^^^^^^^^^^^^^^ + This warning will become an error in a future version of larray. + >>> arr + a a0 a1 a2 + 0 3 2 -* added warning when doing unsafe implicit type conversions (for example from - float to integers) when setting CheckedSession fields (closes :issue:`1168`). +* added a similar warning when doing unsafe type conversions when setting + CheckedSession fields (closes :issue:`1168`). >>> a = Axis('a=a0,a1') >>> class MySession(CheckedSession): @@ -65,15 +47,8 @@ Miscellaneous improvements a a0 a1 0.0 1.6 >>> s.my_field = bad_value - Array 'my_field' was declared with dtype int64 but got value with float64 dtype. - - It will be converted to int64 like in previous larray versions but this is - not a safe operation (some information could be lost in the conversion). - - If you want to keep doing this conversion and silence this warning, please - convert the value explicitly using value.astype(). - - This warning will become an error in a future version of larray. + Array 'my_field' was declared with dtype int64 but <...> (rest of warning + omitted for brevity) Fixes diff --git a/larray/core/array.py b/larray/core/array.py index 6ed9aafb1..22fc80c35 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -895,6 +895,21 @@ def align_arrays(values, join='outer', fill_value=nan, axes=None): for value, aligned_axes in zip(values, aligned_axis_collections)) + +UNSAFE_CAST_WARNING_TEMPLATE = """ +Setting a subset of an array with {expected_dtype} dtype with a value with \ +{value_dtype} dtype. + +It will be converted to {expected_dtype} like in previous larray versions but \ +this is +not a safe operation (some information could be lost in the conversion). + +If you want to keep doing this conversion and silence this warning, please +convert the value explicitly using value.astype(). + +This warning will become an error in a future version of larray.""" + + class Array(ABCArray): r""" An Array object represents a multidimensional, homogeneous array of fixed-size items with labeled axes. @@ -2245,7 +2260,42 @@ def __getitem__(self, key, collapse_slices=False, translate_key=True, points=Fal def __setitem__(self, key, value, collapse_slices=True, translate_key=True, points=False) -> None: raw_broadcasted_key, target_axes, _ = \ self.axes._key_to_raw_and_axes(key, collapse_slices, translate_key, points, wildcard=True) - if isinstance(value, Array): + expected_dtype = self.data.dtype + value_is_array = isinstance(value, Array) + if value_is_array or isinstance(value, (np.generic, np.ndarray)): + value_type = value.dtype + # We do not handle Iterable here because np.(as)array does + # not handle (iterate on) them. + # strings are Sequence but we do not want to convert them to arrays + elif (isinstance(value, Sequence) and + not isinstance(value, (bytes, str))): + value = np.asarray(value) + value_type = value.dtype + else: + # any other value, including Python scalars (int, float, str, etc.) + # datetime objects are not considered scalars by numpy, but we want + # to allow them as values + value_type = type(value) + if value_type != expected_dtype: + if not np.can_cast(value_type, expected_dtype, 'safe'): + value_type_str = value_type.__name__ \ + if isinstance(value_type, type) else str(value_type) + msg = UNSAFE_CAST_WARNING_TEMPLATE.format( + expected_dtype=expected_dtype, + value_dtype=value_type_str + ) + # TODO: in a later version, turn to an exception + warnings.warn(msg, category=FutureWarning, stacklevel=2) + # non numpy scalars cannot be cast using .astype directly + if np.isscalar(value) and not isinstance(value, np.generic): + # First cast to the corresponding numpy scalar type + # We do this instead of casting directly to expected_dtype + # in case Numpy decides to disallow unsafe casts in its + # scalar type init in the future + value = np.dtype(value_type).type(value) + value = value.astype(expected_dtype, casting='unsafe', + copy=False) + if value_is_array: # None target_axes can happen when setting a single "cell"/value with an Array (of size 1) if target_axes is not None: value = value.broadcast_with(target_axes, check_compatible=True) @@ -2271,6 +2321,7 @@ def __setitem__(self, key, value, collapse_slices=True, translate_key=True, poin raise ValueError(f"Value {extra_axes!s} {text} not present in target subset {axes!s}. A value can only " f"have the same axes or fewer axes than the subset being targeted") value = value.data + self.data[raw_broadcasted_key] = value def set(self, value, **kwargs) -> None: @@ -8874,7 +8925,7 @@ def sequence(axis, initial=0, inc=None, mult=None, func=None, axes=None, title=N M 3 7 15 31 F 4 14 44 134 >>> def modify(prev_value): - ... return prev_value / 2 + ... return prev_value // 2 >>> sequence(year, 8, func=modify) year 2016 2017 2018 2019 8 4 2 1 @@ -8984,7 +9035,7 @@ def array_or_full(value, seq_axis, initial): # FIXME: the assert is broken (not has_axis is not what we want) assert ((np.isscalar(inc) and inc != 0) or not has_axis(inc, axis)) and \ (np.isscalar(mult) or not has_axis(mult, axis)) - mult_array = array_or_full(mult, axis, 1.0) + mult_array = array_or_full(mult, axis, 1) cum_mult = mult_array.cumprod(axis)[axis.i[1:]] res[axis.i[0]] = initial diff --git a/larray/core/checked.py b/larray/core/checked.py index 12f154358..9c9d6b4ca 100644 --- a/larray/core/checked.py +++ b/larray/core/checked.py @@ -273,13 +273,13 @@ class CheckedSession(Session, BaseModel, metaclass=LArrayModelMetaclass): >>> # with the AGE and GENDER axes only >>> m.birth_rate = full((AGE, GENDER, TIME), fill_value=Array([0.045, 0.055], GENDER)) >>> # here 'new_births' have the AGE, GENDER and TIME axes instead of the AGE and GENDER axes only - >>> new_births = m.population['female', 2025] * m.birth_rate + >>> new_births = (m.population['female', 2025] * m.birth_rate).astype(int) >>> print(new_births.info) 11 x 2 x 11 age [11]: 0 1 2 ... 8 9 10 gender [2]: 'male' 'female' time [11]: 2020 2021 2022 ... 2028 2029 2030 - dtype: float64 + dtype: int64 memory used: 1.89 Kb >>> # and the line below will crash >>> m.births[2025] = new_births # doctest: +NORMALIZE_WHITESPACE diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 9385193a3..d842ecf27 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -23,6 +23,7 @@ from_lists, from_string, from_frame, from_series, zip_array_values, zip_array_items, nan_to_num ) +from larray.core.array import UNSAFE_CAST_WARNING_TEMPLATE from larray.core.axis import ( _to_ticks, _to_key, _retarget_warn_msg, _group_as_aggregated_label_msg ) @@ -1430,7 +1431,7 @@ def test_setitem_larray(array, small_array): # 1) using a LGroup key as1_5_9 = a[[1, 5, 9]] - # a) value has exactly the same shape as the target slice + # a1) value has exactly the same shape as the target slice arr = array.copy() raw = array.data.copy() @@ -1438,6 +1439,20 @@ def test_setitem_larray(array, small_array): raw[[1, 5, 9]] = raw[[1, 5, 9]] + 25.0 assert_nparray_equal(arr.data, raw) + # a2) correct shape with safe (int to float) casting + arr[as1_5_9] = (array[as1_5_9] + 25).astype(int) + assert_nparray_equal(arr.data, raw) + + # a3) correct shape with unsafe (float to int) casting => warning + int_arr = array.astype(int) + msg = UNSAFE_CAST_WARNING_TEMPLATE.format( + expected_dtype=int_arr.dtype, + value_dtype=array.dtype + ) + with must_warn(FutureWarning, msg): + int_arr[as1_5_9] = array[as1_5_9] + 25.0 + assert_nparray_equal(int_arr.data, raw) + # b) value has exactly the same shape but LGroup at a "wrong" positions arr = array.copy() arr[b[:], as1_5_9] = arr[as1_5_9] + 25.0 @@ -1601,22 +1616,39 @@ def test_setitem_ndarray(array): assert_nparray_equal(arr.data, raw) -def test_setitem_scalar(array): +def test_setitem_scalar(): """ Test Array.__setitem__(key, value) where value is a scalar. """ # a) list key (one dimension) + array = ndtest((2, 3, 4, 5)) arr = array.copy() raw = array.data.copy() - arr[[1, 5, 9]] = 42 - raw[[1, 5, 9]] = 42 + arr[['c0', 'c2', 'c3']] = 42 + raw[:, :, [0, 2, 3]] = 42 assert_nparray_equal(arr.data, raw) # b) full scalar key (ie set one cell) + # b.1) same order than axes arr = array.copy() raw = array.data.copy() - arr[0, 'd2', 'b1', 'c0'] = 42 - raw[0, 1, 0, 1] = 42 + arr['a0', 'b1', 'c0', 'd2'] = 42 + raw[0, 1, 0, 2] = 42 + assert_nparray_equal(arr.data, raw) + + # b.2) different order than axes + arr = array.copy() + arr['a0', 'd2', 'b1', 'c0'] = 42 + assert_nparray_equal(arr.data, raw) + + # c) one cell and unsafe type + arr = array.copy() + msg = UNSAFE_CAST_WARNING_TEMPLATE.format( + expected_dtype='int64', + value_dtype='float' + ) + with must_warn(FutureWarning, msg): + arr['a0', 'b1', 'c0', 'd2'] = 42.9 assert_nparray_equal(arr.data, raw) From 219098e45759d52bd0cbe29baeb8fc73d4b48239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 17 Mar 2026 13:59:25 +0100 Subject: [PATCH 22/28] CLN: indent --- larray/core/array.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/larray/core/array.py b/larray/core/array.py index 22fc80c35..f3fb700b3 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -8861,15 +8861,16 @@ def sequence(axis, initial=0, inc=None, mult=None, func=None, axes=None, title=N initial : scalar or Array, optional Value for the first label of axis. Defaults to 0. inc : scalar, Array, optional - Value to increment the previous value by. Defaults to 1 unless mult is provided (in which case it defaults - to 0). + Value to increment the previous value by. Defaults to 1 unless mult is + provided (in which case it defaults to 0). mult : scalar, Array, optional Value to multiply the previous value by. Defaults to None. func : function/callable, optional Function to apply to the previous value. Defaults to None. Note that this is much slower than using inc and/or mult. axes : int, tuple of int or tuple/list/AxisCollection of Axis, optional - Axes of the result. Defaults to the union of axes present in other arguments. + Axes of the result. Defaults to the union of axes present in other + arguments. title : str, optional Deprecated. See 'meta' below. meta : list of pairs or dict or Metadata, optional From 9e3d2d59f4ec78a0292e470e28ef415c0f6196a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 17 Mar 2026 14:00:30 +0100 Subject: [PATCH 23/28] FIX: fixed slightly buggy assert --- larray/core/array.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/larray/core/array.py b/larray/core/array.py index f3fb700b3..b906254e7 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -9033,9 +9033,10 @@ def array_or_full(value, seq_axis, initial): res[axis.i[1:]] = mult_array.cumprod(axis)[axis.i[1:]] # both inc and mult defined but constant (scalars or axis not present) elif not has_axis(inc, axis) and not has_axis(mult, axis): - # FIXME: the assert is broken (not has_axis is not what we want) - assert ((np.isscalar(inc) and inc != 0) or not has_axis(inc, axis)) and \ - (np.isscalar(mult) or not has_axis(mult, axis)) + scalar_inc = np.isscalar(inc) and inc != 0 + scalar_mult = np.isscalar(mult) and mult != 1 + assert scalar_inc or (isinstance(inc, Array) and axis not in inc.axes) + assert scalar_mult or (isinstance(mult, Array) and axis not in mult.axes) mult_array = array_or_full(mult, axis, 1) cum_mult = mult_array.cumprod(axis)[axis.i[1:]] res[axis.i[0]] = initial From f49043c9a64ba864f1d8ae8c5e959d8b88c9b9dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Mon, 23 Mar 2026 15:02:17 +0100 Subject: [PATCH 24/28] DOC: added comment --- larray/core/group.py | 1 + 1 file changed, 1 insertion(+) diff --git a/larray/core/group.py b/larray/core/group.py index c8c39ed27..92fbd5d8d 100644 --- a/larray/core/group.py +++ b/larray/core/group.py @@ -885,6 +885,7 @@ def __len__(self) -> int: # if isinstance(value, (tuple, list, Array, np.ndarray, str)): if hasattr(value, '__len__'): return len(value) + # this case happens for groups without axis and a slice key elif isinstance(value, slice): start, stop = value.start, value.stop # not using stop - start because that does not work for string bounds From a6196eea43b1bc625433ec2ea6409d4bcbf71060 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 24 Mar 2026 14:09:05 +0100 Subject: [PATCH 25/28] FIX: silent bad results in Group arithmetics (fixes #1173) --- doc/source/changes/version_0_35_1.rst.inc | 3 +++ larray/core/group.py | 7 ++++--- larray/tests/test_group.py | 17 +++++++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/doc/source/changes/version_0_35_1.rst.inc b/doc/source/changes/version_0_35_1.rst.inc index 18122033a..e4ceda91a 100644 --- a/doc/source/changes/version_0_35_1.rst.inc +++ b/doc/source/changes/version_0_35_1.rst.inc @@ -54,6 +54,9 @@ Miscellaneous improvements Fixes ^^^^^ +* fixed silent bad result in some arithmetic operations using Group + instances when using Numpy 2.1 or later (closes :issue:`1173`). + * fixed loading data files with all the following conditions when using Pandas >= 3.0: mixed type columns, some combination of labels in index columns not being present in the data and a non-NaN fill_value diff --git a/larray/core/group.py b/larray/core/group.py index 92fbd5d8d..43a3ee59c 100644 --- a/larray/core/group.py +++ b/larray/core/group.py @@ -1048,7 +1048,8 @@ def _binop(opname): # TODO: implement this in a delayed fashion for axes references def opmethod(self, other): other_value = other.eval() if isinstance(other, Group) else other - return getattr(self.eval(), op_fullname)(other_value) + self_value = self.eval() + return getattr(self_value, op_fullname)(other_value) opmethod.__name__ = op_fullname return opmethod @@ -1504,8 +1505,8 @@ def __dir__(self) -> List[str]: return list(set(dir(self.eval())) | set(attributes) | set(dir(self.__class__))) def __getattr__(self, key) -> Scalar: - if key == '__array_struct__': - raise AttributeError("'Group' object has no attribute '__array_struct__'") + if key in {'__array_struct__', '__array_interface__'}: + raise AttributeError(f"'Group' object has no attribute '{key}'") else: return getattr(self.eval(), key) diff --git a/larray/tests/test_group.py b/larray/tests/test_group.py index 4fbe3b41e..da7b64e59 100644 --- a/larray/tests/test_group.py +++ b/larray/tests/test_group.py @@ -113,6 +113,23 @@ def test_eq_lgroup(lgroups): assert lgroups.list_named == ['P01', 'P03', 'P04'] +def test_group_arithmetic(): + time = Axis('time=2000..2040') + t = time.i[20] # 2020 + r1 = t + 65 + assert r1 == 2085 + + # issue #1173 + age = Axis('age=65..75') + a = age.i[5] # 70 + r2 = r1 - a # should be 2015 + # issue was failing because r1 is a numpy type (int64) and a is a non-numpy + # type (Group) which has a __array_interface__ method forwarded to the + # __array_interface__ of its value (a.eval() which is another numpy int64) + # but no __array_struct__ method. See issue for details. + assert r2 == 2015 # (2085 - 70) + + def test_getitem_lgroup(): axis = Axis("a=a0,a1") assert axis['a0'][0] == 'a' From 24291277d229b165da049d294bc129395a7c6412 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 24 Mar 2026 14:41:33 +0100 Subject: [PATCH 26/28] CLN: move array ops to separate module --- larray/__init__.py | 7 +- larray/core/array.py | 198 ++------------------------------------ larray/core/array_ops.py | 203 +++++++++++++++++++++++++++++++++++++++ larray/core/session.py | 2 +- larray/extra/ipfp.py | 10 +- 5 files changed, 221 insertions(+), 199 deletions(-) create mode 100644 larray/core/array_ops.py diff --git a/larray/__init__.py b/larray/__init__.py index b3b30ecf8..c5a6dca48 100644 --- a/larray/__init__.py +++ b/larray/__init__.py @@ -5,8 +5,7 @@ from larray.core.group import Group, LGroup, LSet, IGroup, union from larray.core.array import (Array, zeros, zeros_like, ones, ones_like, empty, empty_like, full, full_like, sequence, labels_array, ndtest, asarray, identity, diag, - eye, all, any, sum, prod, cumsum, cumprod, min, max, mean, ptp, var, - std, median, percentile, stack, zip_array_values, zip_array_items) + eye, stack, zip_array_values, zip_array_items) from larray.core.session import Session, local_arrays, global_arrays, arrays from larray.core.checked import CheckedArray, CheckedSession, CheckedParameters from larray.core.constants import nan, inf, pi, e, euler_gamma @@ -21,6 +20,10 @@ convolve, clip, sqrt, absolute, fabs, sign, fmax, fmin, real_if_close, interp, isinf, inverse) from larray.core.misc import isscalar +from larray.core.array_ops import ( + all, any, sum, prod, cumsum, cumprod, min, max, mean, median, percentile, + ptp, var, std +) from larray.inout.misc import from_lists, from_string from larray.inout.pandas import from_frame, from_series diff --git a/larray/core/array.py b/larray/core/array.py index b906254e7..57cccaced 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -28,7 +28,6 @@ from itertools import product, chain, groupby from collections.abc import Iterable, Sequence from pathlib import Path -import builtins import functools import warnings @@ -56,6 +55,7 @@ from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis # noqa: F401 from larray.core.axis import align_axis_collections from larray.core.plot import PlotObject +from larray.core.array_ops import sum, _np_op from larray.util.misc import (table2str, size2str, ReprString, float_error_handler_factory, light_product, common_dtype, renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip, @@ -64,181 +64,6 @@ from larray.util.types import Scalar -def all(values, axis=None) -> Union['Array', Scalar]: - r""" - Test whether all array elements along a given axis evaluate to True. - - See Also - -------- - Array.all - """ - if isinstance(values, Array): - return values.all(axis) - else: - return builtins.all(values) - - -def any(values, axis=None) -> Union['Array', Scalar]: - r""" - Test whether any array elements along a given axis evaluate to True. - - See Also - -------- - Array.any - """ - if isinstance(values, Array): - return values.any(axis) - else: - return builtins.any(values) - - -# commutative modulo float precision errors -def sum(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Sum of array elements. - - See Also - -------- - Array.sum - """ - # XXX: we might want to be more aggressive here (more types to convert), however, generators should still be - # computed via the builtin. - if isinstance(array, (np.ndarray, list)): - array = Array(array) - if isinstance(array, Array): - return array.sum(*args, **kwargs) - else: - return builtins.sum(array, *args, **kwargs) - - -def prod(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Product of array elements. - - See Also - -------- - Array.prod - """ - return array.prod(*args, **kwargs) - - -def cumsum(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Return the cumulative sum of array elements. - - See Also - -------- - Array.cumsum - """ - return array.cumsum(*args, **kwargs) - - -def cumprod(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Return the cumulative product of array elements. - - See Also - -------- - Array.cumprod - """ - return array.cumprod(*args, **kwargs) - - -def min(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Minimum of array elements. - - See Also - -------- - Array.min - """ - if isinstance(array, Array): - return array.min(*args, **kwargs) - else: - return builtins.min(array, *args, **kwargs) - - -def max(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Maximum of array elements. - - See Also - -------- - Array.max - """ - if isinstance(array, Array): - return array.max(*args, **kwargs) - else: - return builtins.max(array, *args, **kwargs) - - -def mean(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Compute the arithmetic mean. - - See Also - -------- - Array.mean - """ - return array.mean(*args, **kwargs) - - -def median(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Compute the median. - - See Also - -------- - Array.median - """ - return array.median(*args, **kwargs) - - -def percentile(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Compute the qth percentile of the data along the specified axis. - - See Also - -------- - Array.percentile - """ - return array.percentile(*args, **kwargs) - - -# not commutative -def ptp(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Return the range of values (maximum - minimum). - - See Also - -------- - Array.ptp - """ - return array.ptp(*args, **kwargs) - - -def var(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Compute the variance. - - See Also - -------- - Array.var - """ - return array.var(*args, **kwargs) - - -def std(array, *args, **kwargs) -> Union['Array', Scalar]: - r""" - Compute the standard deviation. - - See Also - -------- - Array.std - """ - return array.std(*args, **kwargs) - - def concat(arrays, axis=0, dtype=None): r"""Concatenate arrays along axis. @@ -3064,6 +2889,7 @@ def with_total(self, *args, op=sum, label='total', **kwargs) -> 'Array': Examples -------- + >>> from larray import mean >>> arr = ndtest("gender=M,F;time=2013..2016") >>> arr gender\time 2013 2014 2015 2016 @@ -3103,18 +2929,6 @@ def with_total(self, *args, op=sum, label='total', **kwargs) -> 'Array': >>> # arr.with_total('time[:2014] >> before_2015; time[2015:] >> after_2015') """ # TODO: make label default to op.__name__ - npop = { - sum: np.sum, - prod: np.prod, - min: np.min, - max: np.max, - mean: np.mean, - ptp: np.ptp, - var: np.var, - std: np.std, - median: np.median, - percentile: np.percentile, - } # TODO: commutative should be known for usual ops operations = self._prepare_aggregate(op, args, kwargs, False, stack_depth=2) res = self @@ -3122,8 +2936,9 @@ def with_total(self, *args, op=sum, label='total', **kwargs) -> 'Array': # only copied once for axis in operations: # TODO: append/extend first with an empty array then _aggregate with out= + npfunc = _np_op[op] if self.axes.isaxis(axis): - value = res._axis_aggregate(npop[op], (axis,), keepaxes=label) + value = res._axis_aggregate(npfunc, (axis,), keepaxes=label) else: # groups if not isinstance(axis, tuple): @@ -3131,7 +2946,7 @@ def with_total(self, *args, op=sum, label='total', **kwargs) -> 'Array': axis = (axis,) lgkey = axis axis = lgkey[0].axis - value = res._aggregate(npop[op], (lgkey,)) + value = res._aggregate(npfunc, (lgkey,)) res = res.append(axis, value) return res @@ -6014,7 +5829,8 @@ def equals(self, other, rtol=0, atol=0, nans_equal=False, check_axes=False) -> b return False try: axes_equal = self.axes == other.axes if check_axes else True - return axes_equal and all(self.eq(other, rtol=rtol, atol=atol, nans_equal=nans_equal)) + eq = self.eq(other, rtol=rtol, atol=atol, nans_equal=nans_equal) + return axes_equal and eq.all() except ValueError: return False diff --git a/larray/core/array_ops.py b/larray/core/array_ops.py new file mode 100644 index 000000000..ee9ef69be --- /dev/null +++ b/larray/core/array_ops.py @@ -0,0 +1,203 @@ +import builtins +import numpy as np +from typing import Union +from larray.util.types import Scalar + +# Note: We use TYPE_CHECKING to avoid circular imports +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from larray.core.array import Array + + +def all(values, axis=None) -> Union['Array', Scalar]: + r""" + Test whether all array elements along a given axis evaluate to True. + + See Also + -------- + Array.all + """ + from larray.core.array import Array + if isinstance(values, Array): + return values.all(axis) + else: + return builtins.all(values) + + +def any(values, axis=None) -> Union['Array', Scalar]: + r""" + Test whether any array elements along a given axis evaluate to True. + + See Also + -------- + Array.any + """ + from larray.core.array import Array + if isinstance(values, Array): + return values.any(axis) + else: + return builtins.any(values) + + +# commutative modulo float precision errors +def sum(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Sum of array elements. + + See Also + -------- + Array.sum + """ + from larray.core.array import Array + # XXX: we might want to be more aggressive here (more types to convert), + # however, generators should still be computed via the builtin. + if isinstance(array, (np.ndarray, list)): + array = Array(array) + if isinstance(array, Array): + return array.sum(*args, **kwargs) + else: + return builtins.sum(array, *args, **kwargs) + + +def prod(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Product of array elements. + + See Also + -------- + Array.prod + """ + return array.prod(*args, **kwargs) + + +def cumsum(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Return the cumulative sum of array elements. + + See Also + -------- + Array.cumsum + """ + return array.cumsum(*args, **kwargs) + + +def cumprod(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Return the cumulative product of array elements. + + See Also + -------- + Array.cumprod + """ + return array.cumprod(*args, **kwargs) + + +def min(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Minimum of array elements. + + See Also + -------- + Array.min + """ + from larray.core.array import Array + if isinstance(array, Array): + return array.min(*args, **kwargs) + else: + return builtins.min(array, *args, **kwargs) + + +def max(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Maximum of array elements. + + See Also + -------- + Array.max + """ + from larray.core.array import Array + if isinstance(array, Array): + return array.max(*args, **kwargs) + else: + return builtins.max(array, *args, **kwargs) + + +def mean(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Compute the arithmetic mean. + + See Also + -------- + Array.mean + """ + return array.mean(*args, **kwargs) + + +def median(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Compute the median. + + See Also + -------- + Array.median + """ + return array.median(*args, **kwargs) + + +def percentile(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Compute the qth percentile of the data along the specified axis. + + See Also + -------- + Array.percentile + """ + return array.percentile(*args, **kwargs) + + +# not commutative +def ptp(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Return the range of values (maximum - minimum). + + See Also + -------- + Array.ptp + """ + return array.ptp(*args, **kwargs) + + +def var(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Compute the variance. + + See Also + -------- + Array.var + """ + return array.var(*args, **kwargs) + + +def std(array, *args, **kwargs) -> Union['Array', Scalar]: + r""" + Compute the standard deviation. + + See Also + -------- + Array.std + """ + return array.std(*args, **kwargs) + + +_np_op = { + sum: np.sum, + prod: np.prod, + min: np.min, + max: np.max, + mean: np.mean, + ptp: np.ptp, + var: np.var, + std: np.std, + median: np.median, + percentile: np.percentile, +} diff --git a/larray/core/session.py b/larray/core/session.py index 247366c20..20c24a400 100644 --- a/larray/core/session.py +++ b/larray/core/session.py @@ -1286,7 +1286,7 @@ def equals(self, other, rtol=0, atol=0, nans_equal=False) -> bool: >>> s1.equals(s2) False """ - return all(self.element_equals(other, rtol=rtol, atol=atol, nans_equal=nans_equal)) + return self.element_equals(other, rtol=rtol, atol=atol, nans_equal=nans_equal).all() def transpose(self, *args) -> 'Session': r"""Reorder axes of arrays in session, ignoring missing axes for each array. diff --git a/larray/extra/ipfp.py b/larray/extra/ipfp.py index a471f42b3..d5310ffb8 100644 --- a/larray/extra/ipfp.py +++ b/larray/extra/ipfp.py @@ -1,7 +1,7 @@ import math from collections import deque -from larray.core.array import Array, asarray, ones, any +from larray.core.array import Array, asarray, ones import numpy as np @@ -222,19 +222,19 @@ def has_anonymous_axes(a): f"{axes[0]} (axis {a.axes.index(axes[0])}): {axis_total} vs {axis0_total}") negative = a < 0 - if any(negative): + if negative.any(): raise ValueError(f"negative value(s) found:\n{badvalues(a, negative)}") for axis, axis_target_sum in zip(axes, target_sums): axis_idx = a.axes.index(axis) axis_sum = a.sum(axis) bad = (axis_sum == 0) & (axis_target_sum != 0) - if any(bad): + if bad.any(): raise ValueError(f"found all zero values sum along {axis.name} (axis {axis_idx}) but non zero target sum:\n" f"{badvalues(axis_target_sum, bad)}") bad = (axis_sum != 0) & (axis_target_sum == 0) - if any(bad): + if bad.any(): if nzvzs in {'warn', 'raise'}: msg = f"found Non Zero Values but Zero target Sum (nzvzs) along {axis.name} (axis {axis_idx})" if nzvzs == 'raise': @@ -245,7 +245,7 @@ def has_anonymous_axes(a): a[bad] = 0 # verify we did fix the problem - assert not any((a.sum(axis) != 0) & (axis_target_sum == 0)) + assert not ((a.sum(axis) != 0) & (axis_target_sum == 0)).any() lastdiffs = deque([float('nan')], maxlen=stepstoabort) From a4c1a6232c82621fc4ded9b9ca9c70e1a1d4e451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Wed, 25 Mar 2026 09:37:09 +0100 Subject: [PATCH 27/28] FIX: fixed from_frame docstring we documented the inverse of the actual behavior :( --- larray/inout/pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index d6259b39f..72cf02cff 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -259,7 +259,7 @@ def from_frame(df, cartesian_prod : bool, optional Whether to expand the dataframe to a cartesian product dataframe as needed by Array. This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already - well-formed. If True, arguments `sort_rows` and `sort_columns` must be set to False. + well-formed. If False, arguments `sort_rows` and `sort_columns` must also be False. Defaults to True. copy : bool, optional Whether to copy the data from the DataFrame. Defaults to True. From f24c25212b41386fe7e659bf794ba491f338a4aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Wed, 3 Dec 2025 17:07:11 +0100 Subject: [PATCH 28/28] MAINT: added support for Python 3.14 in CI (closes #1147) --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b205e5def..5b1b20d8a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,7 @@ jobs: fail-fast: false matrix: # os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] defaults: run: