From f81e2abd9273a0ac959fe14ded1cea68dba2e314 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 3 Mar 2026 15:37:44 -0500 Subject: [PATCH 1/5] add array variant to scalar value Signed-off-by: Connor Tsui --- vortex-array/public-api.lock | 26 +- .../fixed_size_list/vtable/operations.rs | 14 +- .../src/arrays/list/vtable/operations.rs | 16 +- .../src/arrays/listview/vtable/operations.rs | 14 +- vortex-array/src/builders/fixed_size_list.rs | 16 +- vortex-array/src/builders/list.rs | 41 +-- vortex-array/src/builders/listview.rs | 40 +-- vortex-array/src/scalar/constructor.rs | 29 ++ vortex-array/src/scalar/downcast.rs | 32 +-- vortex-array/src/scalar/proto.rs | 168 +++++++++--- vortex-array/src/scalar/scalar_impl.rs | 8 +- vortex-array/src/scalar/scalar_value.rs | 149 +++++++++-- vortex-array/src/scalar/typed_view/list.rs | 251 +++++++++++++----- vortex-array/src/scalar/typed_view/struct_.rs | 2 +- vortex-array/src/scalar/validate.rs | 14 + vortex-array/src/serde.rs | 26 ++ vortex-ipc/src/messages/decoder.rs | 2 +- vortex-proto/proto/scalar.proto | 7 + vortex-proto/public-api.lock | 40 +++ vortex-proto/src/generated/vortex.scalar.rs | 13 +- 20 files changed, 668 insertions(+), 240 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index e4a72149f27..6e7abdd6420 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -11502,6 +11502,8 @@ pub fn vortex_array::variants::PrimitiveTyped<'_>::index_lt(&self, idx: usize, e pub enum vortex_array::scalar::ScalarValue +pub vortex_array::scalar::ScalarValue::Array(vortex_array::ArrayRef) + pub vortex_array::scalar::ScalarValue::Binary(vortex_buffer::ByteBuffer) pub vortex_array::scalar::ScalarValue::Bool(bool) @@ -11522,10 +11524,10 @@ pub fn vortex_array::scalar::ScalarValue::as_bool(&self) -> bool pub fn vortex_array::scalar::ScalarValue::as_decimal(&self) -> &vortex_array::scalar::DecimalValue -pub fn vortex_array::scalar::ScalarValue::as_list(&self) -> &[core::option::Option] - pub fn vortex_array::scalar::ScalarValue::as_primitive(&self) -> &vortex_array::scalar::PValue +pub fn vortex_array::scalar::ScalarValue::as_struct_fields(&self) -> &[core::option::Option] + pub fn vortex_array::scalar::ScalarValue::as_utf8(&self) -> &vortex_buffer::string::BufferString pub fn vortex_array::scalar::ScalarValue::into_binary(self) -> vortex_buffer::ByteBuffer @@ -11534,10 +11536,10 @@ pub fn vortex_array::scalar::ScalarValue::into_bool(self) -> bool pub fn vortex_array::scalar::ScalarValue::into_decimal(self) -> vortex_array::scalar::DecimalValue -pub fn vortex_array::scalar::ScalarValue::into_list(self) -> alloc::vec::Vec> - pub fn vortex_array::scalar::ScalarValue::into_primitive(self) -> vortex_array::scalar::PValue +pub fn vortex_array::scalar::ScalarValue::into_struct_fields(self) -> alloc::vec::Vec> + pub fn vortex_array::scalar::ScalarValue::into_utf8(self) -> vortex_buffer::string::BufferString impl vortex_array::scalar::ScalarValue @@ -11560,7 +11562,7 @@ impl core::cmp::Eq for vortex_array::scalar::ScalarValue impl core::cmp::PartialEq for vortex_array::scalar::ScalarValue -pub fn vortex_array::scalar::ScalarValue::eq(&self, other: &vortex_array::scalar::ScalarValue) -> bool +pub fn vortex_array::scalar::ScalarValue::eq(&self, other: &Self) -> bool impl core::cmp::PartialOrd for vortex_array::scalar::ScalarValue @@ -11732,9 +11734,7 @@ pub fn vortex_array::scalar::ScalarValue::fmt(&self, f: &mut core::fmt::Formatte impl core::hash::Hash for vortex_array::scalar::ScalarValue -pub fn vortex_array::scalar::ScalarValue::hash<__H: core::hash::Hasher>(&self, state: &mut __H) - -impl core::marker::StructuralPartialEq for vortex_array::scalar::ScalarValue +pub fn vortex_array::scalar::ScalarValue::hash(&self, state: &mut H) impl core::convert::From> for vortex_array::scalar::ScalarValue where T: vortex_array::dtype::NativeDType, vortex_array::scalar::Scalar: core::convert::From @@ -12010,6 +12010,8 @@ pub struct vortex_array::scalar::ListScalar<'a> impl<'a> vortex_array::scalar::ListScalar<'a> +pub fn vortex_array::scalar::ListScalar<'a>::as_array(&self) -> core::option::Option<&'a vortex_array::ArrayRef> + pub fn vortex_array::scalar::ListScalar<'a>::dtype(&self) -> &'a vortex_array::dtype::DType pub fn vortex_array::scalar::ListScalar<'a>::element(&self, idx: usize) -> core::option::Option @@ -12230,6 +12232,8 @@ pub fn vortex_array::scalar::Scalar::fixed_size_list(element_dtype: impl core::c pub fn vortex_array::scalar::Scalar::list(element_dtype: impl core::convert::Into>, children: alloc::vec::Vec, nullability: vortex_array::dtype::Nullability) -> Self +pub fn vortex_array::scalar::Scalar::list_array(parent_dtype: vortex_array::dtype::DType, elem_list: vortex_array::ArrayRef) -> Self + pub fn vortex_array::scalar::Scalar::list_empty(element_dtype: alloc::sync::Arc, nullability: vortex_array::dtype::Nullability) -> Self pub fn vortex_array::scalar::Scalar::primitive>(value: T, nullability: vortex_array::dtype::Nullability) -> Self @@ -15802,6 +15806,12 @@ impl core::clone::Clone for vortex_array::serde::ArrayParts pub fn vortex_array::serde::ArrayParts::clone(&self) -> vortex_array::serde::ArrayParts +impl core::convert::TryFrom<&[u8]> for vortex_array::serde::ArrayParts + +pub type vortex_array::serde::ArrayParts::Error = vortex_error::VortexError + +pub fn vortex_array::serde::ArrayParts::try_from(value: &[u8]) -> core::result::Result + impl core::convert::TryFrom for vortex_array::serde::ArrayParts pub type vortex_array::serde::ArrayParts::Error = vortex_error::VortexError diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs b/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs index 3c57bfaac11..baf29d2b592 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs @@ -6,22 +6,14 @@ use vortex_error::VortexResult; use crate::arrays::FixedSizeListArray; use crate::arrays::FixedSizeListVTable; use crate::scalar::Scalar; +use crate::scalar::ScalarValue; use crate::vtable::OperationsVTable; impl OperationsVTable for FixedSizeListVTable { fn scalar_at(array: &FixedSizeListArray, index: usize) -> VortexResult { // By the preconditions we know that the list scalar is not null. let list = array.fixed_size_list_elements_at(index)?; - let children_elements: Vec = (0..list.len()) - .map(|i| list.scalar_at(i)) - .collect::>()?; - - debug_assert_eq!(children_elements.len(), array.list_size() as usize); - - Ok(Scalar::fixed_size_list( - list.dtype().clone(), - children_elements, - array.dtype().nullability(), - )) + let scalar_value = ScalarValue::Array(list); + Scalar::try_new(array.dtype().clone(), Some(scalar_value)) } } diff --git a/vortex-array/src/arrays/list/vtable/operations.rs b/vortex-array/src/arrays/list/vtable/operations.rs index c326786a510..3459557d97a 100644 --- a/vortex-array/src/arrays/list/vtable/operations.rs +++ b/vortex-array/src/arrays/list/vtable/operations.rs @@ -1,27 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::sync::Arc; - use vortex_error::VortexResult; use crate::arrays::ListArray; use crate::arrays::ListVTable; use crate::scalar::Scalar; +use crate::scalar::ScalarValue; use crate::vtable::OperationsVTable; impl OperationsVTable for ListVTable { fn scalar_at(array: &ListArray, index: usize) -> VortexResult { // By the preconditions we know that the list scalar is not null. - let elems = array.list_elements_at(index)?; - let scalars: Vec = (0..elems.len()) - .map(|i| elems.scalar_at(i)) - .collect::>()?; - - Ok(Scalar::list( - Arc::new(elems.dtype().clone()), - scalars, - array.dtype().nullability(), - )) + let list = array.list_elements_at(index)?; + let scalar_value = ScalarValue::Array(list); + Scalar::try_new(array.dtype().clone(), Some(scalar_value)) } } diff --git a/vortex-array/src/arrays/listview/vtable/operations.rs b/vortex-array/src/arrays/listview/vtable/operations.rs index 60bfc873548..f953b2769da 100644 --- a/vortex-array/src/arrays/listview/vtable/operations.rs +++ b/vortex-array/src/arrays/listview/vtable/operations.rs @@ -1,27 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::sync::Arc; - use vortex_error::VortexResult; use crate::arrays::ListViewArray; use crate::arrays::ListViewVTable; use crate::scalar::Scalar; +use crate::scalar::ScalarValue; use crate::vtable::OperationsVTable; impl OperationsVTable for ListViewVTable { fn scalar_at(array: &ListViewArray, index: usize) -> VortexResult { // By the preconditions we know that the list scalar is not null. let list = array.list_elements_at(index)?; - let children: Vec = (0..list.len()) - .map(|i| list.scalar_at(i)) - .collect::>()?; - - Ok(Scalar::list( - Arc::new(list.dtype().clone()), - children, - array.dtype.nullability(), - )) + let scalar_value = ScalarValue::Array(list); + Scalar::try_new(array.dtype().clone(), Some(scalar_value)) } } diff --git a/vortex-array/src/builders/fixed_size_list.rs b/vortex-array/src/builders/fixed_size_list.rs index 3ce90ad339e..f7ac75ee978 100644 --- a/vortex-array/src/builders/fixed_size_list.rs +++ b/vortex-array/src/builders/fixed_size_list.rs @@ -108,11 +108,10 @@ impl FixedSizeListBuilder { /// /// [`ListArray`]: crate::arrays::ListArray pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> { - let Some(elements) = value.elements() else { - // If `elements` is `None`, then the `value` is a null value. + if value.is_null() { self.append_null(); return Ok(()); - }; + } if value.len() != self.list_size() as usize { vortex_bail!( @@ -123,9 +122,16 @@ impl FixedSizeListBuilder { ); } + // Fast-path: if the list scalar is backed by an array, append the entire thing directly. + if let Some(array) = value.as_array() { + return self.append_array_as_list(array); + } + + // Otherwise we must append the individual scalars. + let elements = value + .elements() + .vortex_expect("non-null ListScalar must have elements"); for scalar in elements { - // TODO(connor): This is slow, we should be able to append multiple values at once, or - // the list scalar should hold an Array self.elements_builder.append_scalar(&scalar)?; } self.nulls.append_non_null(); diff --git a/vortex-array/src/builders/list.rs b/vortex-array/src/builders/list.rs index 643402b6555..aaabacd1c22 100644 --- a/vortex-array/src/builders/list.rs +++ b/vortex-array/src/builders/list.rs @@ -6,7 +6,6 @@ use std::sync::Arc; use vortex_error::VortexExpect; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_panic; use vortex_mask::Mask; @@ -111,28 +110,30 @@ impl ListBuilder { /// Appends a list `value` to the builder. pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> { - match value.elements() { - None => { - if self.dtype.nullability() == NonNullable { - vortex_bail!("Cannot append null value to non-nullable list"); - } - self.append_null(); - } - Some(elements) => { - for scalar in elements { - // TODO(connor): This is slow, we should be able to append multiple values at - // once, or the list scalar should hold an Array - self.elements_builder.append_scalar(&scalar)?; - } + if value.is_null() { + self.append_null(); + return Ok(()); + } - self.nulls.append_non_null(); - self.offsets_builder.append_value( - O::from_usize(self.elements_builder.len()) - .vortex_expect("Failed to convert from usize to O"), - ); - } + // Fast-path: if the list scalar is backed by an array, append the entire thing directly. + if let Some(array) = value.as_array() { + return self.append_array_as_list(array); + } + + // Otherwise, we must append the individual scalars. + let elements = value + .elements() + .vortex_expect("non-null ListScalar must have elements"); + for scalar in elements { + self.elements_builder.append_scalar(&scalar)?; } + self.nulls.append_non_null(); + self.offsets_builder.append_value( + O::from_usize(self.elements_builder.len()) + .vortex_expect("Failed to convert from usize to O"), + ); + Ok(()) } diff --git a/vortex-array/src/builders/listview.rs b/vortex-array/src/builders/listview.rs index dc3509c55d9..ba1199a02ad 100644 --- a/vortex-array/src/builders/listview.rs +++ b/vortex-array/src/builders/listview.rs @@ -152,15 +152,20 @@ impl ListViewBuilder { /// This method extends the value builder with the provided values and records /// the offset and size of the new list. pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> { - let Some(elements) = value.elements() else { - // If `elements` is `None`, then the `value` is a null value. - vortex_ensure!( - self.dtype.is_nullable(), - "Cannot append null value to non-nullable list builder" - ); + if value.is_null() { self.append_null(); return Ok(()); - }; + } + + // Fast-path: if the list scalar is backed by an array, append the entire thing directly. + if let Some(array) = value.as_array() { + return self.append_array_as_list(array); + } + + // Otherwise, we must append the individual scalars. + let elements = value + .elements() + .vortex_expect("non-null ListScalar must have elements"); let curr_offset = self.elements_builder.len(); let num_elements = elements.len(); @@ -663,27 +668,6 @@ mod tests { ); } - #[test] - fn test_error_append_null_to_non_nullable() { - let dtype: Arc = Arc::new(I32.into()); - let mut builder = - ListViewBuilder::::with_capacity(dtype.clone(), NonNullable, 0, 0); - - // Create a null list with nullable type (since Scalar::null requires nullable type). - let null_scalar = Scalar::null(DType::List(dtype, Nullable)); - let null_list = null_scalar.as_list(); - - // This should fail because we're trying to append a null to a non-nullable builder. - let result = builder.append_value(null_list); - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("null value to non-nullable") - ); - } - #[test] fn test_append_array_as_list() { use vortex_buffer::buffer; diff --git a/vortex-array/src/scalar/constructor.rs b/vortex-array/src/scalar/constructor.rs index dd10b4e0e90..d673707337b 100644 --- a/vortex-array/src/scalar/constructor.rs +++ b/vortex-array/src/scalar/constructor.rs @@ -10,6 +10,7 @@ use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; use vortex_error::vortex_panic; +use crate::ArrayRef; use crate::dtype::DType; use crate::dtype::DecimalDType; use crate::dtype::NativePType; @@ -101,6 +102,34 @@ impl Scalar { .vortex_expect("unable to construct a binary `Scalar`") } + /// Creates a new list scalar using the [`DType`] of the parent array and a child array + /// representing the list as an array. + /// + /// # Panics + /// + /// Panics if the parent dtype is not a `List` or `FixedSixeList` with an `elem_dtype` equal to + /// the `elem_list`'s [`DType`]. + /// + /// Also panics if the parent dtype is a `FixedSizedList` but the `list_size` is not equal to + /// the array length. + pub fn list_array(parent_dtype: DType, elem_list: ArrayRef) -> Self { + match &parent_dtype { + DType::List(elem_dtype, _) => { + assert_eq!(elem_dtype.as_ref(), elem_list.dtype()); + } + DType::FixedSizeList(elem_dtype, list_size, _) => { + assert_eq!(elem_dtype.as_ref(), elem_list.dtype()); + assert_eq!(elem_list.len(), *list_size as usize); + } + _ => vortex_panic!("expected List or FixedSizeList dtype, got {parent_dtype}"), + }; + + let scalar_value = ScalarValue::Array(elem_list); + + // SAFETY: We just checked that the scalar value is valid for this dtype. + unsafe { Scalar::new_unchecked(parent_dtype, Some(scalar_value)) } + } + /// Creates a new list scalar with the given element type and children. /// /// # Panics diff --git a/vortex-array/src/scalar/downcast.rs b/vortex-array/src/scalar/downcast.rs index 6de05dab4e3..92ff3a7afc2 100644 --- a/vortex-array/src/scalar/downcast.rs +++ b/vortex-array/src/scalar/downcast.rs @@ -119,7 +119,8 @@ impl Scalar { /// /// # Panics /// - /// Panics if the scalar does not have a [`List`](crate::dtype::DType::List) or [`FixedSizeList`](crate::dtype::DType::FixedSizeList) type. + /// Panics if the scalar does not have a [`List`](crate::dtype::DType::List) or + /// [`FixedSizeList`](crate::dtype::DType::FixedSizeList) type. pub fn as_list(&self) -> ListScalar<'_> { self.as_list_opt() .vortex_expect("Failed to convert scalar to list") @@ -156,7 +157,7 @@ impl Scalar { } impl ScalarValue { - /// Returns the boolean value, panicking if the value is not a [`Bool`][ScalarValue::Bool]. + /// Returns the boolean value, panicking if the value is not a [`Bool`](ScalarValue::Bool). pub fn as_bool(&self) -> bool { match self { ScalarValue::Bool(b) => *b, @@ -165,7 +166,7 @@ impl ScalarValue { } /// Returns the primitive value, panicking if the value is not a - /// [`Primitive`][ScalarValue::Primitive]. + /// [`Primitive`](ScalarValue::Primitive). pub fn as_primitive(&self) -> &PValue { match self { ScalarValue::Primitive(p) => p, @@ -174,7 +175,7 @@ impl ScalarValue { } /// Returns the decimal value, panicking if the value is not a - /// [`Decimal`][ScalarValue::Decimal]. + /// [`Decimal`](ScalarValue::Decimal). pub fn as_decimal(&self) -> &DecimalValue { match self { ScalarValue::Decimal(d) => d, @@ -182,7 +183,7 @@ impl ScalarValue { } } - /// Returns the UTF-8 string value, panicking if the value is not a [`Utf8`][ScalarValue::Utf8]. + /// Returns the UTF-8 string value, panicking if the value is not a [`Utf8`](ScalarValue::Utf8). pub fn as_utf8(&self) -> &BufferString { match self { ScalarValue::Utf8(s) => s, @@ -190,7 +191,7 @@ impl ScalarValue { } } - /// Returns the binary value, panicking if the value is not a [`Binary`][ScalarValue::Binary]. + /// Returns the binary value, panicking if the value is not a [`Binary`](ScalarValue::Binary). pub fn as_binary(&self) -> &ByteBuffer { match self { ScalarValue::Binary(b) => b, @@ -198,15 +199,16 @@ impl ScalarValue { } } - /// Returns the list elements, panicking if the value is not a [`List`][ScalarValue::List]. - pub fn as_list(&self) -> &[Option] { + /// Returns the list elements as struct elements, panicking if the value is not a + /// [`List`](ScalarValue::List). + pub fn as_struct_fields(&self) -> &[Option] { match self { ScalarValue::List(elements) => elements, _ => vortex_panic!("ScalarValue is not a List"), } } - /// Returns the boolean value, panicking if the value is not a [`Bool`][ScalarValue::Bool]. + /// Returns the boolean value, panicking if the value is not a [`Bool`](ScalarValue::Bool). pub fn into_bool(self) -> bool { match self { ScalarValue::Bool(b) => b, @@ -215,7 +217,7 @@ impl ScalarValue { } /// Returns the primitive value, panicking if the value is not a - /// [`Primitive`][ScalarValue::Primitive]. + /// [`Primitive`](ScalarValue::Primitive). pub fn into_primitive(self) -> PValue { match self { ScalarValue::Primitive(p) => p, @@ -224,7 +226,7 @@ impl ScalarValue { } /// Returns the decimal value, panicking if the value is not a - /// [`Decimal`][ScalarValue::Decimal]. + /// [`Decimal`](ScalarValue::Decimal). pub fn into_decimal(self) -> DecimalValue { match self { ScalarValue::Decimal(d) => d, @@ -232,7 +234,7 @@ impl ScalarValue { } } - /// Returns the UTF-8 string value, panicking if the value is not a [`Utf8`][ScalarValue::Utf8]. + /// Returns the UTF-8 string value, panicking if the value is not a [`Utf8`](ScalarValue::Utf8). pub fn into_utf8(self) -> BufferString { match self { ScalarValue::Utf8(s) => s, @@ -240,7 +242,7 @@ impl ScalarValue { } } - /// Returns the binary value, panicking if the value is not a [`Binary`][ScalarValue::Binary]. + /// Returns the binary value, panicking if the value is not a [`Binary`](ScalarValue::Binary). pub fn into_binary(self) -> ByteBuffer { match self { ScalarValue::Binary(b) => b, @@ -248,8 +250,8 @@ impl ScalarValue { } } - /// Returns the list elements, panicking if the value is not a [`List`][ScalarValue::List]. - pub fn into_list(self) -> Vec> { + /// Returns the list elements, panicking if the value is not a [`List`](ScalarValue::List). + pub fn into_struct_fields(self) -> Vec> { match self { ScalarValue::List(elements) => elements, _ => vortex_panic!("ScalarValue is not a List"), diff --git a/vortex-array/src/scalar/proto.rs b/vortex-array/src/scalar/proto.rs index b4cdf9a76e6..04e3cfd22e3 100644 --- a/vortex-array/src/scalar/proto.rs +++ b/vortex-array/src/scalar/proto.rs @@ -3,22 +3,30 @@ //! Protobuf serialization and deserialization for scalars. +use std::sync::Arc; + use num_traits::ToBytes; use num_traits::ToPrimitive; use prost::Message; use vortex_buffer::BufferString; use vortex_buffer::ByteBuffer; +use vortex_buffer::ByteBufferMut; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; +use vortex_error::vortex_ensure_eq; use vortex_error::vortex_err; use vortex_proto::scalar as pb; +use vortex_proto::scalar::ArrayValue; use vortex_proto::scalar::ListValue; use vortex_proto::scalar::scalar_value::Kind; use vortex_session::VortexSession; +use crate::ArrayContext; +use crate::ArrayRef; use crate::dtype::DType; +use crate::dtype::DecimalDType; use crate::dtype::PType; use crate::dtype::half::f16; use crate::dtype::i256; @@ -26,6 +34,9 @@ use crate::scalar::DecimalValue; use crate::scalar::PValue; use crate::scalar::Scalar; use crate::scalar::ScalarValue; +use crate::serde::ArrayParts; +use crate::serde::SerializeOptions; +use crate::vtable::ArrayId; //////////////////////////////////////////////////////////////////////////////////////////////////// // Serialize INTO proto. @@ -110,6 +121,34 @@ impl From<&ScalarValue> for pb::ScalarValue { kind: Some(Kind::ListValue(ListValue { values })), } } + ScalarValue::Array(array) => { + let ctx = ArrayContext::empty(); + + // Note that `ctx` interns an ID during serialization (through the &). + let serialized = array + .to_array() + .serialize(&ctx, &SerializeOptions::default()) + .vortex_expect("somehow unable to serialize value as array"); + + let encoding_ids = ctx.to_ids().into_iter().map(|id| id.to_string()).collect(); + + // Serialize the array buffers back-to-back. + let mut data = ByteBufferMut::empty(); + for buf in serialized { + data.extend_from_slice(buf.as_ref()); + } + let data = data.freeze().to_vec(); + + let array_value = ArrayValue { + row_count: array.len() as u64, + encoding_ids, + data, + }; + + pb::ScalarValue { + kind: Some(Kind::ArrayValue(array_value)), + } + } } } } @@ -255,6 +294,7 @@ impl ScalarValue { Kind::StringValue(s) => string_from_proto(s, dtype)?, Kind::BytesValue(b) => bytes_from_proto(b, dtype)?, Kind::ListValue(v) => list_from_proto(v, dtype, session)?, + Kind::ArrayValue(a) => array_from_proto(a, dtype, session)?, })) } } @@ -377,51 +417,101 @@ fn string_from_proto(s: &str, dtype: &DType) -> VortexResult { } } -/// Deserialize a [`ScalarValue`] from a protobuf bytes and a `DType`. +/// Deserialize a [`ScalarValue`] from a protobuf bytes and a [`DType`]. +/// +/// Handles all variable-size scalars, including: /// -/// Handles [`Utf8`](ScalarValue::Utf8), [`Binary`](ScalarValue::Binary), and -/// [`Decimal`](ScalarValue::Decimal) dtypes. +/// - `Utf8` -> `ScalarValue::Utf8` +/// - `Binary` -> `ScalarValue::Binary` +/// - `Decimal` -> `ScalarValue::Decimal` (Since decimal has different width representations) fn bytes_from_proto(bytes: &[u8], dtype: &DType) -> VortexResult { match dtype { DType::Utf8(_) => Ok(ScalarValue::Utf8(BufferString::try_from(bytes)?)), DType::Binary(_) => Ok(ScalarValue::Binary(ByteBuffer::copy_from(bytes))), - // TODO(connor): This is incorrect, we need to verify this matches the inner decimal_dtype. - DType::Decimal(..) => Ok(ScalarValue::Decimal(match bytes.len() { - 1 => DecimalValue::I8(bytes[0] as i8), - 2 => DecimalValue::I16(i16::from_le_bytes( - bytes - .try_into() - .ok() - .vortex_expect("Buffer has invalid number of bytes"), - )), - 4 => DecimalValue::I32(i32::from_le_bytes( - bytes - .try_into() - .ok() - .vortex_expect("Buffer has invalid number of bytes"), - )), - 8 => DecimalValue::I64(i64::from_le_bytes( - bytes - .try_into() - .ok() - .vortex_expect("Buffer has invalid number of bytes"), - )), - 16 => DecimalValue::I128(i128::from_le_bytes( - bytes - .try_into() - .ok() - .vortex_expect("Buffer has invalid number of bytes"), - )), - 32 => DecimalValue::I256(i256::from_le_bytes( - bytes - .try_into() - .ok() - .vortex_expect("Buffer has invalid number of bytes"), - )), - l => vortex_bail!(Serde: "invalid decimal byte length: {l}"), - })), + DType::Decimal(decimal_dtype, _) => decimal_from_proto(bytes, decimal_dtype), + _ => vortex_bail!( + Serde: "expected Utf8, Binary, List, FSL, or Decimal dtype for BytesValue, got {dtype}" + ), + } +} + +/// Deserialize a [`ScalarValue::Decimal`] from a protobuf bytes. +fn decimal_from_proto(bytes: &[u8], _decimal_dtype: &DecimalDType) -> VortexResult { + let value = match bytes.len() { + 1 => DecimalValue::I8(bytes[0] as i8), + 2 => { + DecimalValue::I16(i16::from_le_bytes(bytes.try_into().ok().vortex_expect( + "we just checked that there was the correct number of bytes", + ))) + } + 4 => { + DecimalValue::I32(i32::from_le_bytes(bytes.try_into().ok().vortex_expect( + "we just checked that there was the correct number of bytes", + ))) + } + 8 => { + DecimalValue::I64(i64::from_le_bytes(bytes.try_into().ok().vortex_expect( + "we just checked that there was the correct number of bytes", + ))) + } + 16 => { + DecimalValue::I128(i128::from_le_bytes(bytes.try_into().ok().vortex_expect( + "we just checked that there was the correct number of bytes", + ))) + } + 32 => { + DecimalValue::I256(i256::from_le_bytes(bytes.try_into().ok().vortex_expect( + "we just checked that there was the correct number of bytes", + ))) + } + l => vortex_bail!(Serde: "invalid decimal byte length: {l}"), + }; + + Ok(ScalarValue::Decimal(value)) +} + +/// The inner function for deserializing a [`ScalarValue::Array`] from a protobuf `ArrayValue`. +fn array_from_proto_inner( + array_value: &ArrayValue, + elem_dtype: &DType, + session: &VortexSession, +) -> VortexResult { + let array_len: usize = array_value.row_count.try_into().map_err( + |_| vortex_err!(Serde: "array row_count {} exceeds usize::MAX", array_value.row_count), + )?; + + // TODO(connor): Do we care about alignment? + // We don't care about alignment here since ArrayParts will handle it. + let parts = ArrayParts::try_from(array_value.data.as_slice())?; + + let encoding_ids = array_value + .encoding_ids + .iter() + .map(|id| ArrayId::new_arc(Arc::from(id.clone()))) + .collect::>(); + let ctx = ArrayContext::new(encoding_ids); + + parts.decode(elem_dtype, array_len, &ctx, session) +} + +/// Deserialize a [`ScalarValue::Array`] from a protobuf `ArrayValue`. +fn array_from_proto( + array_value: &ArrayValue, + dtype: &DType, + session: &VortexSession, +) -> VortexResult { + match dtype { + DType::List(elem_dtype, _) => { + let array = array_from_proto_inner(array_value, elem_dtype, session)?; + Ok(ScalarValue::Array(array)) + } + DType::FixedSizeList(elem_dtype, list_size, _) => { + let array = array_from_proto_inner(array_value, elem_dtype, session)?; + vortex_ensure_eq!(array.len(), *list_size as usize); + Ok(ScalarValue::Array(array)) + } _ => vortex_bail!( - Serde: "expected Utf8, Binary, or Decimal dtype for BytesValue, got {dtype}" + Serde: "expected Utf8, Binary, List, FSL, or Decimal dtype for BytesValue, got {dtype}" ), } } diff --git a/vortex-array/src/scalar/scalar_impl.rs b/vortex-array/src/scalar/scalar_impl.rs index c7a0e9b7999..fea04f67725 100644 --- a/vortex-array/src/scalar/scalar_impl.rs +++ b/vortex-array/src/scalar/scalar_impl.rs @@ -186,9 +186,11 @@ impl Scalar { DType::Decimal(..) => value.as_decimal().is_zero(), DType::Utf8(_) => value.as_utf8().is_empty(), DType::Binary(_) => value.as_binary().is_empty(), - DType::List(..) => value.as_list().is_empty(), - DType::FixedSizeList(_, list_size, _) => value.as_list().len() == *list_size as usize, - DType::Struct(struct_fields, _) => value.as_list().len() == struct_fields.nfields(), + DType::List(..) => self.as_list().is_empty(), + DType::FixedSizeList(_, list_size, _) => self.as_list().len() == *list_size as usize, + DType::Struct(struct_fields, _) => { + value.as_struct_fields().len() == struct_fields.nfields() + } DType::Extension(_) => self.as_extension().to_storage_scalar().is_zero()?, }; diff --git a/vortex-array/src/scalar/scalar_value.rs b/vortex-array/src/scalar/scalar_value.rs index 71bf4fcfc77..c633b498277 100644 --- a/vortex-array/src/scalar/scalar_value.rs +++ b/vortex-array/src/scalar/scalar_value.rs @@ -4,36 +4,61 @@ //! Core [`ScalarValue`] type definition. use std::cmp::Ordering; -use std::fmt::Display; -use std::fmt::Formatter; +use std::fmt; +use std::hash::Hash; +use std::hash::Hasher; use itertools::Itertools; use vortex_buffer::BufferString; use vortex_buffer::ByteBuffer; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_error::vortex_panic; +use crate::ArrayRef; use crate::dtype::DType; use crate::scalar::DecimalValue; use crate::scalar::PValue; -/// The value stored in a [`Scalar`][crate::scalar::Scalar]. +/// The value stored in a [`Scalar`](crate::scalar::Scalar). /// /// This enum represents the possible non-null values that can be stored in a scalar. When the /// scalar is null, the value is represented as `None` in the `Option` field. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +/// +/// We can think of this type loosely (heavy emphasis on "loosely") as the serialization / physical +/// type of scalars. +#[derive(Debug, Clone)] pub enum ScalarValue { /// A boolean value. Bool(bool), + /// A primitive numeric value. Primitive(PValue), + /// A decimal value. Decimal(DecimalValue), + /// A UTF-8 encoded string value. Utf8(BufferString), + /// A binary (byte array) value. Binary(ByteBuffer), + /// A list of potentially null scalar values. + /// + /// Previously, we used this to represent all of struct, list, and fixed-size list scalars, but + /// with the recent addition of the `ScalarValue::Array` variant below, this only stores struct + /// scalars (and any other list scalars written to files in the past for backcompat). List(Vec>), + + /// An [`ArrayRef`] representing a single list scalar of a `List` or `FixedSizeList` array. + /// + /// We serialize this by using `Array::serialize()` (see vortex-array/src/serde.rs) into + /// protobuf bytes. Note that because we require passing the length of the array to deserialize + /// with [`ArrayParts::decode()`](crate::serde::ArrayParts::decode), we store the array length + /// in the first 8 bytes. + Array(ArrayRef), } impl ScalarValue { @@ -102,25 +127,28 @@ impl ScalarValue { } }) } -} -impl PartialOrd for ScalarValue { - fn partial_cmp(&self, other: &Self) -> Option { - match (self, other) { - (ScalarValue::Bool(a), ScalarValue::Bool(b)) => a.partial_cmp(b), - (ScalarValue::Primitive(a), ScalarValue::Primitive(b)) => a.partial_cmp(b), - (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a.partial_cmp(b), - (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a.partial_cmp(b), - (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a.partial_cmp(b), - (ScalarValue::List(a), ScalarValue::List(b)) => a.partial_cmp(b), - // (ScalarValue::Extension(a), ScalarValue::Extension(b)) => a.partial_cmp(b), - _ => None, + // TODO(connor): Is there a more performant way we can do this? + /// Returns a `List` representation of this value, expanding `Array` variants as needed. + /// + /// If the value is already a `List`, it is cloned as-is. If it is an `Array`, each element is + /// extracted via [`scalar_at`](crate::Array::scalar_at) and collected into a `List`. + fn as_list_expanded(&self) -> VortexResult { + match self { + Self::List(_) => Ok(self.clone()), + Self::Array(array) => { + let values = (0..array.len()) + .map(|i| array.scalar_at(i).map(|scalar| scalar.value().cloned())) + .collect::>>()?; + Ok(Self::List(values)) + } + _ => vortex_bail!("Expected List or Array ScalarValue"), } } } -impl Display for ScalarValue { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +impl fmt::Display for ScalarValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ScalarValue::Bool(b) => write!(f, "{b}"), ScalarValue::Primitive(p) => write!(f, "{p}"), @@ -163,6 +191,91 @@ impl Display for ScalarValue { } write!(f, "]") } + ScalarValue::Array(_) => { + // We simply expand the value out into a list to display it. + let expanded = self.as_list_expanded().vortex_expect( + "something went wrong when expanding scalar value for displaying", + ); + + expanded.fmt(f) + } + } + } +} + +impl PartialEq for ScalarValue { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (ScalarValue::Bool(a), ScalarValue::Bool(b)) => a == b, + (ScalarValue::Primitive(a), ScalarValue::Primitive(b)) => a == b, + (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a == b, + (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a == b, + (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a == b, + (ScalarValue::List(a), ScalarValue::List(b)) => a == b, + // Two arrays: compare element-by-element without allocating intermediate Vecs. + (ScalarValue::Array(a), ScalarValue::Array(b)) => { + // TODO(connor): Is there a better way to do this? + a.len() == b.len() + && a.dtype() == b.dtype() + && (0..a.len()).all(|i| a.scalar_at(i).ok() == b.scalar_at(i).ok()) + } + // Cross-variant comparisons between `Array` and `List`: expand the `Array` side. + (ScalarValue::Array(_), ScalarValue::List(_)) + | (ScalarValue::List(_), ScalarValue::Array(_)) => { + let lhs = self.as_list_expanded().vortex_expect( + "something went wrong when expanding lhs scalar value for comparison", + ); + let rhs = other.as_list_expanded().vortex_expect( + "something went wrong when expanding rhs scalar value for comparison", + ); + lhs == rhs + } + _ => false, + } + } +} + +impl Eq for ScalarValue {} + +impl PartialOrd for ScalarValue { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (ScalarValue::Bool(a), ScalarValue::Bool(b)) => a.partial_cmp(b), + (ScalarValue::Primitive(a), ScalarValue::Primitive(b)) => a.partial_cmp(b), + (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a.partial_cmp(b), + (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a.partial_cmp(b), + (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a.partial_cmp(b), + (ScalarValue::List(a), ScalarValue::List(b)) => a.partial_cmp(b), + // Any combination involving `Array`: expand both sides and delegate. + (ScalarValue::Array(_), ScalarValue::List(_)) + | (ScalarValue::List(_), ScalarValue::Array(_)) + | (ScalarValue::Array(_), ScalarValue::Array(_)) => { + let lhs = self.as_list_expanded().vortex_expect( + "something went wrong when expanding lhs scalar value for comparison", + ); + let rhs = other.as_list_expanded().vortex_expect( + "something went wrong when expanding rhs scalar value for comparison", + ); + lhs.partial_cmp(&rhs) + } + _ => None, + } + } +} + +impl Hash for ScalarValue { + fn hash(&self, state: &mut H) { + match self { + ScalarValue::Bool(b) => b.hash(state), + ScalarValue::Primitive(p) => p.hash(state), + ScalarValue::Decimal(d) => d.hash(state), + ScalarValue::Utf8(s) => s.hash(state), + ScalarValue::Binary(b) => b.hash(state), + ScalarValue::List(l) => l.hash(state), + ScalarValue::Array(_) => self + .as_list_expanded() + .vortex_expect("something went wrong when expanding scalar value for hashing") + .hash(state), } } } diff --git a/vortex-array/src/scalar/typed_view/list.rs b/vortex-array/src/scalar/typed_view/list.rs index 6c4206f2a0e..32420309444 100644 --- a/vortex-array/src/scalar/typed_view/list.rs +++ b/vortex-array/src/scalar/typed_view/list.rs @@ -15,6 +15,7 @@ use vortex_error::vortex_bail; use vortex_error::vortex_err; use vortex_error::vortex_panic; +use crate::ArrayRef; use crate::dtype::DType; use crate::scalar::Scalar; use crate::scalar::ScalarValue; @@ -33,66 +34,13 @@ use crate::scalar::ScalarValue; pub struct ListScalar<'a> { /// The data type of this scalar. dtype: &'a DType, + /// A convenience field so that we do not have to unwrap and check the top-level `dtype` field /// every time we want to access this. element_dtype: &'a Arc, - /// The elements of the list. `None` if the entire list is null. - /// Each element is `Option` where `None` represents a null element within the - /// list. - elements: Option<&'a [Option]>, -} - -impl Display for ListScalar<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match &self.elements { - None => write!(f, "null"), - Some(elems) => { - let type_str: &dyn Display = if let DType::FixedSizeList(_, size, _) = self.dtype { - &format!("fixed_size<{size}>") - } else { - &"" - }; - - write!( - f, - "{type_str}[{}]", - elems - .iter() - .map(|e| Scalar::try_new(self.element_dtype().clone(), e.clone()) - .vortex_expect("`ListScalar` is already a valid `Scalar`")) - .format(", ") - ) - } - } - } -} - -impl PartialEq for ListScalar<'_> { - fn eq(&self, other: &Self) -> bool { - self.dtype.eq_ignore_nullability(other.dtype) && self.elements() == other.elements() - } -} - -impl Eq for ListScalar<'_> {} - -/// Ord is not implemented since it's undefined for different element DTypes -impl PartialOrd for ListScalar<'_> { - fn partial_cmp(&self, other: &Self) -> Option { - if !self - .element_dtype - .eq_ignore_nullability(other.element_dtype) - { - return None; - } - self.elements().partial_cmp(&other.elements()) - } -} -impl Hash for ListScalar<'_> { - fn hash(&self, state: &mut H) { - self.dtype.hash(state); - self.elements().hash(state); - } + /// The elements of this list scalar, or `None` if the scalar is null. + elements: Option>, } impl<'a> ListScalar<'a> { @@ -109,7 +57,11 @@ impl<'a> ListScalar<'a> { Ok(Self { dtype, element_dtype, - elements: value.map(|v| v.as_list()), + elements: value.map(|v| match v { + ScalarValue::List(elems) => Elements::Expanded(elems), + ScalarValue::Array(array) => Elements::Array(array), + _ => vortex_panic!("Expected List or Array ScalarValue for list dtype"), + }), }) } @@ -130,10 +82,7 @@ impl<'a> ListScalar<'a> { /// Returns true if the list has no elements or is null. #[inline] pub fn is_empty(&self) -> bool { - match self.elements.as_ref() { - None => true, - Some(l) => l.is_empty(), - } + self.elements.as_ref().map(|e| e.is_empty()).unwrap_or(true) } /// Returns true if the list is null. @@ -142,6 +91,17 @@ impl<'a> ListScalar<'a> { self.elements.is_none() } + /// Returns the underlying [`ArrayRef`] if this scalar is backed by an array. + /// + /// This is useful for bulk operations (e.g., in builders) that can avoid expanding the array + /// into individual scalars. + pub fn as_array(&self) -> Option<&'a ArrayRef> { + self.elements.and_then(|e| match e { + Elements::Expanded(_) => None, + Elements::Array(array) => Some(array), + }) + } + /// Returns the data type of the list's elements. pub fn element_dtype(&self) -> &DType { self.dtype @@ -152,24 +112,34 @@ impl<'a> ListScalar<'a> { /// Returns the element at the given index as a scalar. /// - /// Returns None if the list is null or the index is out of bounds. + /// Returns `None` if the list is null or the index is out of bounds. + /// + /// For `Array`-backed scalars, this calls [`Array::scalar_at()`](crate::Array::scalar_at) on + /// the underlying array which may involve per-element work. For `Expanded` list scalars, this + /// is a scalar clone. pub fn element(&self, idx: usize) -> Option { - self.elements.and_then(|l| l.get(idx)).map(|value| { + self.elements.and_then(|l| l.get_owned(idx)).map(|value| { // SAFETY: `ListScalar` is already a valid `Scalar`. - unsafe { Scalar::new_unchecked(self.element_dtype().clone(), value.clone()) } + unsafe { Scalar::new_unchecked(self.element_dtype().clone(), value) } }) } /// Returns all elements in the list as a vector of scalars. /// - /// Returns None if the list is null. + /// Returns `None` if the list is null. + /// + /// This always materializes every element into a `Vec`. For `Array`-backed scalars this + /// requires calling [`Array::scalar_at()`](crate::Array::scalar_at) for each element. + /// + /// Prefer [`as_array()`](Self::as_array) when the underlying array can be used directly (e.g., + /// in builders). pub fn elements(&self) -> Option> { self.elements.map(|elems| { elems .iter() .map(|e| { // SAFETY: `ListScalar` is already a valid `Scalar`. - unsafe { Scalar::new_unchecked(self.element_dtype().clone(), e.clone()) } + unsafe { Scalar::new_unchecked(self.element_dtype().clone(), e) } }) .collect_vec() }) @@ -213,7 +183,7 @@ impl<'a> ListScalar<'a> { .iter() .map(|element| { // Recursively cast the elements of the list. - Scalar::try_new(DType::clone(self.element_dtype), element.clone())? + Scalar::try_new(DType::clone(self.element_dtype), element)? .cast(target_element_dtype) .map(|x| x.into_value()) }) @@ -223,6 +193,153 @@ impl<'a> ListScalar<'a> { } } +impl Display for ListScalar<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self.elements { + None => write!(f, "null"), + Some(elems) => { + let type_str: &dyn Display = if let DType::FixedSizeList(_, size, _) = self.dtype { + &format!("fixed_size<{size}>") + } else { + &"" + }; + + write!( + f, + "{type_str}[{}]", + elems + .iter() + .map(|e| Scalar::try_new(self.element_dtype().clone(), e) + .vortex_expect("`ListScalar` is already a valid `Scalar`")) + .format(", ") + ) + } + } + } +} + +impl PartialEq for ListScalar<'_> { + fn eq(&self, other: &Self) -> bool { + if !self.dtype.eq_ignore_nullability(other.dtype) { + return false; + } + + match (self.elements, other.elements) { + (None, None) => true, + (Some(a), Some(b)) => { + // TODO(connor): Is this the best we can do? + a.len() == b.len() && (0..a.len()).all(|i| a.get_owned(i) == b.get_owned(i)) + } + _ => false, + } + } +} + +impl Eq for ListScalar<'_> {} + +/// Ord is not implemented since it's undefined for different element DTypes. +impl PartialOrd for ListScalar<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + if !self + .element_dtype + .eq_ignore_nullability(other.element_dtype) + { + return None; + } + + self.elements().partial_cmp(&other.elements()) + } +} + +impl Hash for ListScalar<'_> { + fn hash(&self, state: &mut H) { + self.dtype.hash(state); + self.len().hash(state); + + if let Some(elems) = self.elements { + for i in 0..elems.len() { + elems.get_owned(i).hash(state); + } + } + } +} + +/// The elements of the list. +#[derive(Debug, Clone, Copy)] +enum Elements<'a> { + /// Each element is `Option` where `None` represents a null element within the + /// list. This used to be the only representation of lists (before we added an `Array` variant). + Expanded(&'a [Option]), + /// A list represented by an [`ArrayRef`]. + Array(&'a ArrayRef), +} + +impl<'a> Elements<'a> { + /// Returns the number of elements in the list. + fn len(&self) -> usize { + match self { + Elements::Expanded(elems) => elems.len(), + Elements::Array(array) => array.len(), + } + } + + /// Returns true if the list has no elements. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns an iterator over the elements of the list. + fn iter(self) -> OwnedElementsIter<'a> { + OwnedElementsIter { + elements: self, + index: 0, + } + } + + /// Returns the element at the given index as an `Option`. + /// + /// Returns `None` if the index is out of bounds. Returns `Some(None)` if the element is null. + /// Returns `Some(Some(ScalarValue))` if the element is non-null. + fn get_owned(&self, idx: usize) -> Option> { + match self { + Elements::Expanded(elems) => elems.get(idx).cloned(), + Elements::Array(array) => { + if idx >= array.len() { + None + } else { + let scalar = array + .scalar_at(idx) + .vortex_expect("index already bounds-checked"); + Some(scalar.into_value()) + } + } + } + } +} + +/// An iterator over the elements of a list, yielding `Option` for each element. +struct OwnedElementsIter<'a> { + elements: Elements<'a>, + index: usize, +} + +impl Iterator for OwnedElementsIter<'_> { + type Item = Option; + + fn next(&mut self) -> Option { + let item = self.elements.get_owned(self.index)?; + self.index += 1; + Some(item) + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = self.elements.len() - self.index; + (remaining, Some(remaining)) + } +} + +impl ExactSizeIterator for OwnedElementsIter<'_> {} + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/vortex-array/src/scalar/typed_view/struct_.rs b/vortex-array/src/scalar/typed_view/struct_.rs index ce571350a5b..2dc08df6166 100644 --- a/vortex-array/src/scalar/typed_view/struct_.rs +++ b/vortex-array/src/scalar/typed_view/struct_.rs @@ -122,7 +122,7 @@ impl<'a> StructScalar<'a> { Ok(Self { dtype, - fields: value.map(|value| value.as_list()), + fields: value.map(|value| value.as_struct_fields()), }) } diff --git a/vortex-array/src/scalar/validate.rs b/vortex-array/src/scalar/validate.rs index 3fed201ed93..22d3a4ae0dd 100644 --- a/vortex-array/src/scalar/validate.rs +++ b/vortex-array/src/scalar/validate.rs @@ -74,6 +74,13 @@ impl Scalar { ); } DType::List(elem_dtype, _) => { + if let ScalarValue::Array(array) = value { + // If the scalar value is an array, we just need to check that the array + // elements match the elem_dtype, so just check that the dtype is the same. + vortex_ensure_eq!(elem_dtype.as_ref(), array.dtype()); + return Ok(()); + } + let ScalarValue::List(elements) = value else { vortex_bail!("list dtype expected List value, got {value}"); }; @@ -84,6 +91,13 @@ impl Scalar { } } DType::FixedSizeList(elem_dtype, size, _) => { + if let ScalarValue::Array(array) = value { + // If the scalar value is an array, we just need to check that the array + // elements match the elem_dtype, so just check that the dtype is the same. + vortex_ensure_eq!(elem_dtype.as_ref(), array.dtype()); + return Ok(()); + } + let ScalarValue::List(elements) = value else { vortex_bail!("fixed-size list dtype expected List value, got {value}",); }; diff --git a/vortex-array/src/serde.rs b/vortex-array/src/serde.rs index e749e2696c3..da472c4c2c0 100644 --- a/vortex-array/src/serde.rs +++ b/vortex-array/src/serde.rs @@ -639,6 +639,32 @@ impl ArrayChildren for ArrayPartsChildren<'_> { } } +// TODO(connor): Do we need this? + +impl TryFrom<&[u8]> for ArrayParts { + type Error = VortexError; + + fn try_from(value: &[u8]) -> Result { + // The final 4 bytes contain the length of the flatbuffer. + if value.len() < 4 { + vortex_bail!("ArrayParts buffer is too short"); + } + + // TODO(connor): Do we care about alignment here? + + let fb_length = u32::try_from_le_bytes(&value[value.len() - 4..])? as usize; + if value.len() < 4 + fb_length { + vortex_bail!("ArrayParts buffer is too short for flatbuffer"); + } + + let fb_offset = value.len() - 4 - fb_length; + let array_tree = ByteBuffer::copy_from(&value[fb_offset..fb_offset + fb_length]); + let segment = BufferHandle::new_host(ByteBuffer::copy_from(&value[0..fb_offset])); + + Self::from_flatbuffer_and_segment(array_tree, segment) + } +} + impl TryFrom for ArrayParts { type Error = VortexError; diff --git a/vortex-ipc/src/messages/decoder.rs b/vortex-ipc/src/messages/decoder.rs index c61b546506d..9847efdc6f1 100644 --- a/vortex-ipc/src/messages/decoder.rs +++ b/vortex-ipc/src/messages/decoder.rs @@ -44,7 +44,7 @@ pub enum PollRead { Some(DecoderMessage), /// The decoder needs more data to make progress. /// - /// The inner value is the **total*k number of bytes the buffer should contain, not the + /// The inner value is the **total** number of bytes the buffer should contain, not the /// incremental amount needed. Callers should: /// /// 1. Resize the buffer to this length. diff --git a/vortex-proto/proto/scalar.proto b/vortex-proto/proto/scalar.proto index fcc31b25e3e..640b7ea5bd3 100644 --- a/vortex-proto/proto/scalar.proto +++ b/vortex-proto/proto/scalar.proto @@ -28,9 +28,16 @@ message ScalarValue { bytes bytes_value = 8; ListValue list_value = 9; uint64 f16_value = 10; + ArrayValue array_value = 11; } } message ListValue { repeated ScalarValue values = 1; } + +message ArrayValue { + uint64 row_count = 1; + repeated string encoding_ids = 2; + bytes data = 3; +} diff --git a/vortex-proto/public-api.lock b/vortex-proto/public-api.lock index 303fa403401..be1dfde2dcb 100644 --- a/vortex-proto/public-api.lock +++ b/vortex-proto/public-api.lock @@ -1084,6 +1084,8 @@ pub mod vortex_proto::scalar::scalar_value pub enum vortex_proto::scalar::scalar_value::Kind +pub vortex_proto::scalar::scalar_value::Kind::ArrayValue(vortex_proto::scalar::ArrayValue) + pub vortex_proto::scalar::scalar_value::Kind::BoolValue(bool) pub vortex_proto::scalar::scalar_value::Kind::BytesValue(alloc::vec::Vec) @@ -1126,6 +1128,44 @@ pub fn vortex_proto::scalar::scalar_value::Kind::fmt(&self, f: &mut core::fmt::F impl core::marker::StructuralPartialEq for vortex_proto::scalar::scalar_value::Kind +pub struct vortex_proto::scalar::ArrayValue + +pub vortex_proto::scalar::ArrayValue::data: alloc::vec::Vec + +pub vortex_proto::scalar::ArrayValue::encoding_ids: alloc::vec::Vec + +pub vortex_proto::scalar::ArrayValue::row_count: u64 + +impl core::clone::Clone for vortex_proto::scalar::ArrayValue + +pub fn vortex_proto::scalar::ArrayValue::clone(&self) -> vortex_proto::scalar::ArrayValue + +impl core::cmp::Eq for vortex_proto::scalar::ArrayValue + +impl core::cmp::PartialEq for vortex_proto::scalar::ArrayValue + +pub fn vortex_proto::scalar::ArrayValue::eq(&self, other: &vortex_proto::scalar::ArrayValue) -> bool + +impl core::default::Default for vortex_proto::scalar::ArrayValue + +pub fn vortex_proto::scalar::ArrayValue::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::ArrayValue + +pub fn vortex_proto::scalar::ArrayValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_proto::scalar::ArrayValue + +pub fn vortex_proto::scalar::ArrayValue::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::ArrayValue + +impl prost::message::Message for vortex_proto::scalar::ArrayValue + +pub fn vortex_proto::scalar::ArrayValue::clear(&mut self) + +pub fn vortex_proto::scalar::ArrayValue::encoded_len(&self) -> usize + pub struct vortex_proto::scalar::ListValue pub vortex_proto::scalar::ListValue::values: alloc::vec::Vec diff --git a/vortex-proto/src/generated/vortex.scalar.rs b/vortex-proto/src/generated/vortex.scalar.rs index 9f4e3047e54..41b6d6ab5a5 100644 --- a/vortex-proto/src/generated/vortex.scalar.rs +++ b/vortex-proto/src/generated/vortex.scalar.rs @@ -8,7 +8,7 @@ pub struct Scalar { } #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScalarValue { - #[prost(oneof = "scalar_value::Kind", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10")] + #[prost(oneof = "scalar_value::Kind", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11")] pub kind: ::core::option::Option, } /// Nested message and enum types in `ScalarValue`. @@ -35,6 +35,8 @@ pub mod scalar_value { ListValue(super::ListValue), #[prost(uint64, tag = "10")] F16Value(u64), + #[prost(message, tag = "11")] + ArrayValue(super::ArrayValue), } } #[derive(Clone, PartialEq, ::prost::Message)] @@ -42,3 +44,12 @@ pub struct ListValue { #[prost(message, repeated, tag = "1")] pub values: ::prost::alloc::vec::Vec, } +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct ArrayValue { + #[prost(uint64, tag = "1")] + pub row_count: u64, + #[prost(string, repeated, tag = "2")] + pub encoding_ids: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(bytes = "vec", tag = "3")] + pub data: ::prost::alloc::vec::Vec, +} From ab6709323da9a4efcdee25a27379ff97a1fbabaf Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 4 Mar 2026 10:08:48 -0500 Subject: [PATCH 2/5] make renames Signed-off-by: Connor Tsui --- vortex-array/src/scalar/arbitrary.rs | 6 +-- vortex-array/src/scalar/constructor.rs | 2 +- .../src/scalar/convert/into_scalar.rs | 2 +- vortex-array/src/scalar/downcast.rs | 4 +- vortex-array/src/scalar/proto.rs | 14 ++--- vortex-array/src/scalar/scalar_value.rs | 34 ++++++------- vortex-array/src/scalar/tests/casting.rs | 6 +-- vortex-array/src/scalar/tests/nested.rs | 4 +- vortex-array/src/scalar/tests/nullability.rs | 6 +-- vortex-array/src/scalar/typed_view/list.rs | 4 +- vortex-array/src/scalar/typed_view/struct_.rs | 8 +-- vortex-array/src/scalar/validate.rs | 6 +-- vortex-proto/proto/scalar.proto | 51 ++++++++++++++++--- vortex-proto/src/generated/vortex.scalar.rs | 45 ++++++++++++++-- 14 files changed, 135 insertions(+), 57 deletions(-) diff --git a/vortex-array/src/scalar/arbitrary.rs b/vortex-array/src/scalar/arbitrary.rs index 342d4fa59b0..b1eaa5a3f83 100644 --- a/vortex-array/src/scalar/arbitrary.rs +++ b/vortex-array/src/scalar/arbitrary.rs @@ -66,7 +66,7 @@ pub fn random_scalar(u: &mut Unstructured, dtype: &DType) -> Result { .vortex_expect("unable to construct random `Scalar`_"), DType::Struct(sdt, _) => Scalar::try_new( dtype.clone(), - Some(ScalarValue::List( + Some(ScalarValue::Struct( sdt.fields() .map(|d| random_scalar(u, &d).map(|s| s.into_value())) .collect::>>()?, @@ -75,7 +75,7 @@ pub fn random_scalar(u: &mut Unstructured, dtype: &DType) -> Result { .vortex_expect("unable to construct random `Scalar`_"), DType::List(edt, _) => Scalar::try_new( dtype.clone(), - Some(ScalarValue::List( + Some(ScalarValue::Struct( iter::from_fn(|| { // Generate elements with 1/4 probability. u.arbitrary() @@ -88,7 +88,7 @@ pub fn random_scalar(u: &mut Unstructured, dtype: &DType) -> Result { .vortex_expect("unable to construct random `Scalar`_"), DType::FixedSizeList(edt, size, _) => Scalar::try_new( dtype.clone(), - Some(ScalarValue::List( + Some(ScalarValue::Struct( (0..*size) .map(|_| random_scalar(u, edt).map(|s| s.into_value())) .collect::>>()?, diff --git a/vortex-array/src/scalar/constructor.rs b/vortex-array/src/scalar/constructor.rs index d673707337b..bbcc33210c5 100644 --- a/vortex-array/src/scalar/constructor.rs +++ b/vortex-array/src/scalar/constructor.rs @@ -195,7 +195,7 @@ impl Scalar { ListKind::FixedSize => DType::FixedSizeList(element_dtype, size, nullability), }; - Self::try_new(dtype, Some(ScalarValue::List(children))) + Self::try_new(dtype, Some(ScalarValue::Struct(children))) .vortex_expect("unable to construct a list `Scalar`") } diff --git a/vortex-array/src/scalar/convert/into_scalar.rs b/vortex-array/src/scalar/convert/into_scalar.rs index ac804c06dde..869075f1d69 100644 --- a/vortex-array/src/scalar/convert/into_scalar.rs +++ b/vortex-array/src/scalar/convert/into_scalar.rs @@ -82,7 +82,7 @@ where Scalar: From, { fn from(vec: Vec) -> Self { - ScalarValue::List( + ScalarValue::Struct( vec.into_iter() .map(|elem| Scalar::from(elem).into_value()) .collect(), diff --git a/vortex-array/src/scalar/downcast.rs b/vortex-array/src/scalar/downcast.rs index 92ff3a7afc2..6b788d81ffc 100644 --- a/vortex-array/src/scalar/downcast.rs +++ b/vortex-array/src/scalar/downcast.rs @@ -203,7 +203,7 @@ impl ScalarValue { /// [`List`](ScalarValue::List). pub fn as_struct_fields(&self) -> &[Option] { match self { - ScalarValue::List(elements) => elements, + ScalarValue::Struct(elements) => elements, _ => vortex_panic!("ScalarValue is not a List"), } } @@ -253,7 +253,7 @@ impl ScalarValue { /// Returns the list elements, panicking if the value is not a [`List`](ScalarValue::List). pub fn into_struct_fields(self) -> Vec> { match self { - ScalarValue::List(elements) => elements, + ScalarValue::Struct(elements) => elements, _ => vortex_panic!("ScalarValue is not a List"), } } diff --git a/vortex-array/src/scalar/proto.rs b/vortex-array/src/scalar/proto.rs index 04e3cfd22e3..ba64c800156 100644 --- a/vortex-array/src/scalar/proto.rs +++ b/vortex-array/src/scalar/proto.rs @@ -19,7 +19,7 @@ use vortex_error::vortex_ensure_eq; use vortex_error::vortex_err; use vortex_proto::scalar as pb; use vortex_proto::scalar::ArrayValue; -use vortex_proto::scalar::ListValue; +use vortex_proto::scalar::StructValue; use vortex_proto::scalar::scalar_value::Kind; use vortex_session::VortexSession; @@ -112,13 +112,13 @@ impl From<&ScalarValue> for pb::ScalarValue { ScalarValue::Binary(v) => pb::ScalarValue { kind: Some(Kind::BytesValue(v.to_vec())), }, - ScalarValue::List(v) => { + ScalarValue::Struct(v) => { let mut values = Vec::with_capacity(v.len()); for elem in v.iter() { values.push(ScalarValue::to_proto(elem.as_ref())); } pb::ScalarValue { - kind: Some(Kind::ListValue(ListValue { values })), + kind: Some(Kind::StructValue(StructValue { values })), } } ScalarValue::Array(array) => { @@ -293,7 +293,7 @@ impl ScalarValue { Kind::F64Value(v) => f64_from_proto(*v, dtype)?, Kind::StringValue(s) => string_from_proto(s, dtype)?, Kind::BytesValue(b) => bytes_from_proto(b, dtype)?, - Kind::ListValue(v) => list_from_proto(v, dtype, session)?, + Kind::StructValue(v) => list_from_proto(v, dtype, session)?, Kind::ArrayValue(a) => array_from_proto(a, dtype, session)?, })) } @@ -518,7 +518,7 @@ fn array_from_proto( /// Deserialize a [`ScalarValue::List`] from a protobuf `ListValue`. fn list_from_proto( - v: &ListValue, + v: &StructValue, dtype: &DType, session: &VortexSession, ) -> VortexResult { @@ -535,7 +535,7 @@ fn list_from_proto( )?); } - Ok(ScalarValue::List(values)) + Ok(ScalarValue::Struct(values)) } #[cfg(test)] @@ -612,7 +612,7 @@ mod tests { Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), Nullability::Nullable, ), - Some(ScalarValue::List(vec![ + Some(ScalarValue::Struct(vec![ Some(ScalarValue::Primitive(42i32.into())), Some(ScalarValue::Primitive(43i32.into())), ])), diff --git a/vortex-array/src/scalar/scalar_value.rs b/vortex-array/src/scalar/scalar_value.rs index c633b498277..26afd2d1e4b 100644 --- a/vortex-array/src/scalar/scalar_value.rs +++ b/vortex-array/src/scalar/scalar_value.rs @@ -50,7 +50,7 @@ pub enum ScalarValue { /// Previously, we used this to represent all of struct, list, and fixed-size list scalars, but /// with the recent addition of the `ScalarValue::Array` variant below, this only stores struct /// scalars (and any other list scalars written to files in the past for backcompat). - List(Vec>), + Struct(Vec>), /// An [`ArrayRef`] representing a single list scalar of a `List` or `FixedSizeList` array. /// @@ -71,17 +71,17 @@ impl ScalarValue { DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)), DType::Utf8(_) => Self::Utf8(BufferString::empty()), DType::Binary(_) => Self::Binary(ByteBuffer::empty()), - DType::List(..) => Self::List(vec![]), + DType::List(..) => Self::Struct(vec![]), DType::FixedSizeList(edt, size, _) => { let elements = (0..*size).map(|_| Some(Self::zero_value(edt))).collect(); - Self::List(elements) + Self::Struct(elements) } DType::Struct(fields, _) => { let field_values = fields .fields() .map(|f| Some(Self::zero_value(&f))) .collect(); - Self::List(field_values) + Self::Struct(field_values) } DType::Extension(ext_dtype) => { // Since we have no way to define a "zero" extension value (since we have no idea @@ -110,14 +110,14 @@ impl ScalarValue { DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)), DType::Utf8(_) => Self::Utf8(BufferString::empty()), DType::Binary(_) => Self::Binary(ByteBuffer::empty()), - DType::List(..) => Self::List(vec![]), + DType::List(..) => Self::Struct(vec![]), DType::FixedSizeList(edt, size, _) => { let elements = (0..*size).map(|_| Self::default_value(edt)).collect(); - Self::List(elements) + Self::Struct(elements) } DType::Struct(fields, _) => { let field_values = fields.fields().map(|f| Self::default_value(&f)).collect(); - Self::List(field_values) + Self::Struct(field_values) } DType::Extension(ext_dtype) => { // Since we have no way to define a "default" extension value (since we have no idea @@ -135,12 +135,12 @@ impl ScalarValue { /// extracted via [`scalar_at`](crate::Array::scalar_at) and collected into a `List`. fn as_list_expanded(&self) -> VortexResult { match self { - Self::List(_) => Ok(self.clone()), + Self::Struct(_) => Ok(self.clone()), Self::Array(array) => { let values = (0..array.len()) .map(|i| array.scalar_at(i).map(|scalar| scalar.value().cloned())) .collect::>>()?; - Ok(Self::List(values)) + Ok(Self::Struct(values)) } _ => vortex_bail!("Expected List or Array ScalarValue"), } @@ -178,7 +178,7 @@ impl fmt::Display for ScalarValue { write!(f, "{}", to_hex(b)) } } - ScalarValue::List(elements) => { + ScalarValue::Struct(elements) => { write!(f, "[")?; for (i, element) in elements.iter().enumerate() { if i > 0 { @@ -211,7 +211,7 @@ impl PartialEq for ScalarValue { (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a == b, (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a == b, (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a == b, - (ScalarValue::List(a), ScalarValue::List(b)) => a == b, + (ScalarValue::Struct(a), ScalarValue::Struct(b)) => a == b, // Two arrays: compare element-by-element without allocating intermediate Vecs. (ScalarValue::Array(a), ScalarValue::Array(b)) => { // TODO(connor): Is there a better way to do this? @@ -220,8 +220,8 @@ impl PartialEq for ScalarValue { && (0..a.len()).all(|i| a.scalar_at(i).ok() == b.scalar_at(i).ok()) } // Cross-variant comparisons between `Array` and `List`: expand the `Array` side. - (ScalarValue::Array(_), ScalarValue::List(_)) - | (ScalarValue::List(_), ScalarValue::Array(_)) => { + (ScalarValue::Array(_), ScalarValue::Struct(_)) + | (ScalarValue::Struct(_), ScalarValue::Array(_)) => { let lhs = self.as_list_expanded().vortex_expect( "something went wrong when expanding lhs scalar value for comparison", ); @@ -245,10 +245,10 @@ impl PartialOrd for ScalarValue { (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a.partial_cmp(b), (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a.partial_cmp(b), (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a.partial_cmp(b), - (ScalarValue::List(a), ScalarValue::List(b)) => a.partial_cmp(b), + (ScalarValue::Struct(a), ScalarValue::Struct(b)) => a.partial_cmp(b), // Any combination involving `Array`: expand both sides and delegate. - (ScalarValue::Array(_), ScalarValue::List(_)) - | (ScalarValue::List(_), ScalarValue::Array(_)) + (ScalarValue::Array(_), ScalarValue::Struct(_)) + | (ScalarValue::Struct(_), ScalarValue::Array(_)) | (ScalarValue::Array(_), ScalarValue::Array(_)) => { let lhs = self.as_list_expanded().vortex_expect( "something went wrong when expanding lhs scalar value for comparison", @@ -271,7 +271,7 @@ impl Hash for ScalarValue { ScalarValue::Decimal(d) => d.hash(state), ScalarValue::Utf8(s) => s.hash(state), ScalarValue::Binary(b) => b.hash(state), - ScalarValue::List(l) => l.hash(state), + ScalarValue::Struct(l) => l.hash(state), ScalarValue::Array(_) => self .as_list_expanded() .vortex_expect("something went wrong when expanding scalar value for hashing") diff --git a/vortex-array/src/scalar/tests/casting.rs b/vortex-array/src/scalar/tests/casting.rs index 72b44a5ed55..0317e7f6985 100644 --- a/vortex-array/src/scalar/tests/casting.rs +++ b/vortex-array/src/scalar/tests/casting.rs @@ -158,7 +158,7 @@ mod tests { Some(ScalarValue::Primitive(PValue::F32(f32_value))), ]; - let scalar = Scalar::new(struct_dtype, Some(ScalarValue::List(field_values))); + let scalar = Scalar::new(struct_dtype, Some(ScalarValue::Struct(field_values))); let struct_scalar = scalar.as_struct(); let fields: Vec<_> = (0..3) @@ -215,7 +215,7 @@ mod tests { ))), ]; - let scalar = Scalar::new(list_dtype, Some(ScalarValue::List(elements))); + let scalar = Scalar::new(list_dtype, Some(ScalarValue::Struct(elements))); let list_scalar = scalar.as_list(); let elements = list_scalar.elements().unwrap(); @@ -371,7 +371,7 @@ mod tests { let scalar = Scalar::new( DType::Extension(ext_dtype.erased()), - Some(ScalarValue::List(field_values)), + Some(ScalarValue::Struct(field_values)), ); // Verify the struct field was coerced diff --git a/vortex-array/src/scalar/tests/nested.rs b/vortex-array/src/scalar/tests/nested.rs index c4b28418c36..d4df124b868 100644 --- a/vortex-array/src/scalar/tests/nested.rs +++ b/vortex-array/src/scalar/tests/nested.rs @@ -311,7 +311,7 @@ mod tests { Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), Nullability::Nullable, ), - Some(ScalarValue::List(vec![ + Some(ScalarValue::Struct(vec![ Some(ScalarValue::Primitive(PValue::U16(6))), Some(ScalarValue::Primitive(PValue::U16(100))), ])), @@ -350,7 +350,7 @@ mod tests { Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), Nullability::Nullable, ), - Some(ScalarValue::List(vec![ + Some(ScalarValue::Struct(vec![ Some(ScalarValue::Primitive(PValue::U16(100))), Some(ScalarValue::Primitive(PValue::U16(256))), // Too large for U8 Some(ScalarValue::Primitive(PValue::U16(1000))), // Too large for U8 diff --git a/vortex-array/src/scalar/tests/nullability.rs b/vortex-array/src/scalar/tests/nullability.rs index d70796d20ce..cb26bc94a19 100644 --- a/vortex-array/src/scalar/tests/nullability.rs +++ b/vortex-array/src/scalar/tests/nullability.rs @@ -54,7 +54,7 @@ mod tests { Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), Nullability::Nullable, ), - Some(ScalarValue::List(vec![Some(ScalarValue::Primitive( + Some(ScalarValue::Struct(vec![Some(ScalarValue::Primitive( PValue::U16(6), ))])), ); @@ -97,7 +97,7 @@ mod tests { Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), Nullability::Nullable, ), - Some(ScalarValue::List(vec![ + Some(ScalarValue::Struct(vec![ Some(ScalarValue::Primitive(PValue::U16(6))), None, Some(ScalarValue::Primitive(PValue::U16(10))), @@ -200,7 +200,7 @@ mod tests { Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), Nullability::Nullable, ), - Some(ScalarValue::List(vec![ + Some(ScalarValue::Struct(vec![ Some(ScalarValue::Primitive(PValue::U16(6))), None, ])), diff --git a/vortex-array/src/scalar/typed_view/list.rs b/vortex-array/src/scalar/typed_view/list.rs index 32420309444..52e14581ec6 100644 --- a/vortex-array/src/scalar/typed_view/list.rs +++ b/vortex-array/src/scalar/typed_view/list.rs @@ -58,7 +58,7 @@ impl<'a> ListScalar<'a> { dtype, element_dtype, elements: value.map(|v| match v { - ScalarValue::List(elems) => Elements::Expanded(elems), + ScalarValue::Struct(elems) => Elements::Expanded(elems), ScalarValue::Array(array) => Elements::Array(array), _ => vortex_panic!("Expected List or Array ScalarValue for list dtype"), }), @@ -177,7 +177,7 @@ impl<'a> ListScalar<'a> { Scalar::try_new( dtype.clone(), - Some(ScalarValue::List( + Some(ScalarValue::Struct( self.elements .ok_or_else(|| vortex_err!("nullness should be handled in Scalar::cast"))? .iter() diff --git a/vortex-array/src/scalar/typed_view/struct_.rs b/vortex-array/src/scalar/typed_view/struct_.rs index 2dc08df6166..f9d2f38c18f 100644 --- a/vortex-array/src/scalar/typed_view/struct_.rs +++ b/vortex-array/src/scalar/typed_view/struct_.rs @@ -237,7 +237,7 @@ impl<'a> StructScalar<'a> { .map(|s| s.into_value()) }) .collect::>>()?; - Scalar::try_new(dtype.clone(), Some(ScalarValue::List(fields))) + Scalar::try_new(dtype.clone(), Some(ScalarValue::Struct(fields))) } else { Ok(Scalar::null(dtype.clone())) } @@ -262,7 +262,7 @@ impl<'a> StructScalar<'a> { return Ok(Scalar::null(projected_dtype)); }; - let new_fields = ScalarValue::List( + let new_fields = ScalarValue::Struct( projection .iter() .map(|name| { @@ -307,7 +307,7 @@ impl Scalar { } let value_children: Vec<_> = children.into_iter().map(|x| x.into_value()).collect(); - Self::try_new(dtype, Some(ScalarValue::List(value_children))) + Self::try_new(dtype, Some(ScalarValue::Struct(value_children))) .vortex_expect("unable to construct a struct `Scalar`") } @@ -324,7 +324,7 @@ impl Scalar { children: impl IntoIterator, ) -> Self { let value_children: Vec<_> = children.into_iter().map(|s| s.into_value()).collect(); - Self::try_new(dtype, Some(ScalarValue::List(value_children))) + Self::try_new(dtype, Some(ScalarValue::Struct(value_children))) .vortex_expect("unable to construct a struct `Scalar`") } } diff --git a/vortex-array/src/scalar/validate.rs b/vortex-array/src/scalar/validate.rs index 22d3a4ae0dd..2ef80a71719 100644 --- a/vortex-array/src/scalar/validate.rs +++ b/vortex-array/src/scalar/validate.rs @@ -81,7 +81,7 @@ impl Scalar { return Ok(()); } - let ScalarValue::List(elements) = value else { + let ScalarValue::Struct(elements) = value else { vortex_bail!("list dtype expected List value, got {value}"); }; @@ -98,7 +98,7 @@ impl Scalar { return Ok(()); } - let ScalarValue::List(elements) = value else { + let ScalarValue::Struct(elements) = value else { vortex_bail!("fixed-size list dtype expected List value, got {value}",); }; @@ -116,7 +116,7 @@ impl Scalar { } } DType::Struct(fields, _) => { - let ScalarValue::List(values) = value else { + let ScalarValue::Struct(values) = value else { vortex_bail!("struct dtype expected List value, got {value}"); }; diff --git a/vortex-proto/proto/scalar.proto b/vortex-proto/proto/scalar.proto index 640b7ea5bd3..26ef773b31a 100644 --- a/vortex-proto/proto/scalar.proto +++ b/vortex-proto/proto/scalar.proto @@ -5,17 +5,37 @@ syntax = "proto3"; package vortex.scalar; -option java_package = "dev.vortex.proto"; -option java_outer_classname = "ScalarProtos"; - import "dtype.proto"; import "google/protobuf/struct.proto"; +option java_outer_classname = "ScalarProtos"; +option java_package = "dev.vortex.proto"; + message Scalar { + // The logical data type of this scalar. vortex.dtype.DType dtype = 1; + // The value payload, or absent if the scalar is null (only possible when the dtype is + // nullable). ScalarValue value = 2; } +// The value payload of a `Scalar`. +// +// Each variant corresponds to a physical representation. The correct interpretation depends on the +// accompanying `DType` in the parent `Scalar` message. For example, an `int64_value` may represent +// an i8, i16, i32, or i64 depending on the dtype's `PType`. Deserialization of a `ScalarValue` +// only makes sense when paired with a `DType`. +// +// Primitive integer types are widened to 64-bit (`sint64` for signed, `uint64` for unsigned) since +// protobuf does not have narrower integer fields. The original width is recovered using the dtype +// during deserialization. +// +// The order of this enum is the order in which these types were added. We must maintain it to +// preserve backwards compatibility. +// +// Finally, note that this `ScalarValue` in **not** 1-to-1 with the Vortex `ScalarValue` enum, as +// this protobuf type internally tracks if the value is null. The Vortex `ScalarValue` enum in +// non-nullable, and we reperent a nullable Vortex scalar value as `Option` instead. message ScalarValue { oneof kind { google.protobuf.NullValue null_value = 1; @@ -26,18 +46,37 @@ message ScalarValue { double f64_value = 6; string string_value = 7; bytes bytes_value = 8; - ListValue list_value = 9; + StructValue struct_value = 9; uint64 f16_value = 10; ArrayValue array_value = 11; } } -message ListValue { - repeated ScalarValue values = 1; +// A struct scalar value that packs several heterogeneous field values together. +// +// Each entry in `fields` corresponds to a field of the parent struct dtype, in order. +// +// NB: This message was previously called `ListValue` and was used for both struct and list / +// fixed-size list scalars. It has since been renamed, and list scalars are now serialized using +// `ArrayValue` instead. +// However, old files may still contain `StructValue` (tag 9) payloads for list dtypes. To maintain +// backwards compatibility, deserializers must check the accompanying dtype and, if it is a list or +// fixed-size list, reconstruct an array from the individual scalar elements. +message StructValue { + // The field values, one per struct field, in dtype field order. + repeated ScalarValue fields = 1; } +// A serialized Vortex array, used to represent a single list or fixed-size list scalar. +// +// The array is serialized using `Array::serialize()` and can be reconstructed with +// `ArrayParts::decode()`. The `encoding_ids` list provides the encoding table needed for +// deserialization, and `data` contains the concatenated array buffers. message ArrayValue { + // The number of elements (rows) in the serialized array. uint64 row_count = 1; + // Encoding IDs that form the encoding table for deserialization. repeated string encoding_ids = 2; + // The concatenated serialized array buffers. bytes data = 3; } diff --git a/vortex-proto/src/generated/vortex.scalar.rs b/vortex-proto/src/generated/vortex.scalar.rs index 41b6d6ab5a5..dbdbf272a67 100644 --- a/vortex-proto/src/generated/vortex.scalar.rs +++ b/vortex-proto/src/generated/vortex.scalar.rs @@ -1,11 +1,31 @@ // This file is @generated by prost-build. #[derive(Clone, PartialEq, ::prost::Message)] pub struct Scalar { + /// The logical data type of this scalar. #[prost(message, optional, tag = "1")] pub dtype: ::core::option::Option, + /// The value payload, or absent if the scalar is null (only possible when the dtype is + /// nullable). #[prost(message, optional, tag = "2")] pub value: ::core::option::Option, } +/// The value payload of a `Scalar`. +/// +/// Each variant corresponds to a physical representation. The correct interpretation depends on the +/// accompanying `DType` in the parent `Scalar` message. For example, an `int64_value` may represent +/// an i8, i16, i32, or i64 depending on the dtype's `PType`. Deserialization of a `ScalarValue` +/// only makes sense when paired with a `DType`. +/// +/// Primitive integer types are widened to 64-bit (`sint64` for signed, `uint64` for unsigned) since +/// protobuf does not have narrower integer fields. The original width is recovered using the dtype +/// during deserialization. +/// +/// The order of this enum is the order in which these types were added. We must maintain it to +/// preserve backwards compatibility. +/// +/// Finally, note that this `ScalarValue` in **not** 1-to-1 with the Vortex `ScalarValue` enum, as +/// this protobuf type internally tracks if the value is null. The Vortex `ScalarValue` enum in +/// non-nullable, and we reperent a nullable Vortex scalar value as `Option` instead. #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScalarValue { #[prost(oneof = "scalar_value::Kind", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11")] @@ -32,24 +52,43 @@ pub mod scalar_value { #[prost(bytes, tag = "8")] BytesValue(::prost::alloc::vec::Vec), #[prost(message, tag = "9")] - ListValue(super::ListValue), + StructValue(super::StructValue), #[prost(uint64, tag = "10")] F16Value(u64), #[prost(message, tag = "11")] ArrayValue(super::ArrayValue), } } +/// A struct scalar value that packs several heterogeneous field values together. +/// +/// Each entry in `fields` corresponds to a field of the parent struct dtype, in order. +/// +/// NB: This message was previously called `ListValue` and was used for both struct and list / +/// fixed-size list scalars. It has since been renamed, and list scalars are now serialized using +/// `ArrayValue` instead. +/// However, old files may still contain `StructValue` (tag 9) payloads for list dtypes. To maintain +/// backwards compatibility, deserializers must check the accompanying dtype and, if it is a list or +/// fixed-size list, reconstruct an array from the individual scalar elements. #[derive(Clone, PartialEq, ::prost::Message)] -pub struct ListValue { +pub struct StructValue { + /// The field values, one per struct field, in dtype field order. #[prost(message, repeated, tag = "1")] - pub values: ::prost::alloc::vec::Vec, + pub fields: ::prost::alloc::vec::Vec, } +/// A serialized Vortex array, used to represent a single list or fixed-size list scalar. +/// +/// The array is serialized using `Array::serialize()` and can be reconstructed with +/// `ArrayParts::decode()`. The `encoding_ids` list provides the encoding table needed for +/// deserialization, and `data` contains the concatenated array buffers. #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct ArrayValue { + /// The number of elements (rows) in the serialized array. #[prost(uint64, tag = "1")] pub row_count: u64, + /// Encoding IDs that form the encoding table for deserialization. #[prost(string, repeated, tag = "2")] pub encoding_ids: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// The concatenated serialized array buffers. #[prost(bytes = "vec", tag = "3")] pub data: ::prost::alloc::vec::Vec, } From 9d6bd808d47c734a971baf23b31d06a16fba2a59 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 4 Mar 2026 11:51:20 -0500 Subject: [PATCH 3/5] only use array for list scalars Signed-off-by: Connor Tsui --- vortex-array/public-api.lock | 10 +- vortex-array/src/builders/mod.rs | 13 + vortex-array/src/scalar/arbitrary.rs | 49 ++-- vortex-array/src/scalar/cast.rs | 9 +- vortex-array/src/scalar/constructor.rs | 31 ++- .../src/scalar/convert/into_scalar.rs | 9 +- vortex-array/src/scalar/downcast.rs | 28 +- vortex-array/src/scalar/proto.rs | 157 +++++++++--- vortex-array/src/scalar/scalar_value.rs | 123 +++++---- vortex-array/src/scalar/tests/casting.rs | 21 +- vortex-array/src/scalar/tests/nested.rs | 36 ++- vortex-array/src/scalar/tests/nullability.rs | 50 ++-- vortex-array/src/scalar/typed_view/list.rs | 241 +++++++----------- vortex-array/src/scalar/validate.rs | 52 ++-- vortex-proto/public-api.lock | 60 ++--- 15 files changed, 466 insertions(+), 423 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 6e7abdd6420..6cb4075f8e5 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -5596,6 +5596,8 @@ pub fn vortex_array::builders::PrimitiveBuilder::set_validity(&mut self, vali pub unsafe fn vortex_array::builders::PrimitiveBuilder::set_validity_unchecked(&mut self, validity: vortex_mask::Mask) +pub fn vortex_array::builders::build_array_from_scalars(dtype: &vortex_array::dtype::DType, scalars: &[vortex_array::scalar::Scalar]) -> vortex_array::ArrayRef + pub fn vortex_array::builders::builder_with_capacity(dtype: &vortex_array::dtype::DType, capacity: usize) -> alloc::boxed::Box pub mod vortex_array::builtins @@ -11510,10 +11512,10 @@ pub vortex_array::scalar::ScalarValue::Bool(bool) pub vortex_array::scalar::ScalarValue::Decimal(vortex_array::scalar::DecimalValue) -pub vortex_array::scalar::ScalarValue::List(alloc::vec::Vec>) - pub vortex_array::scalar::ScalarValue::Primitive(vortex_array::scalar::PValue) +pub vortex_array::scalar::ScalarValue::Struct(alloc::vec::Vec>) + pub vortex_array::scalar::ScalarValue::Utf8(vortex_buffer::string::BufferString) impl vortex_array::scalar::ScalarValue @@ -11524,6 +11526,8 @@ pub fn vortex_array::scalar::ScalarValue::as_bool(&self) -> bool pub fn vortex_array::scalar::ScalarValue::as_decimal(&self) -> &vortex_array::scalar::DecimalValue +pub fn vortex_array::scalar::ScalarValue::as_list(&self) -> &vortex_array::ArrayRef + pub fn vortex_array::scalar::ScalarValue::as_primitive(&self) -> &vortex_array::scalar::PValue pub fn vortex_array::scalar::ScalarValue::as_struct_fields(&self) -> &[core::option::Option] @@ -11536,6 +11540,8 @@ pub fn vortex_array::scalar::ScalarValue::into_bool(self) -> bool pub fn vortex_array::scalar::ScalarValue::into_decimal(self) -> vortex_array::scalar::DecimalValue +pub fn vortex_array::scalar::ScalarValue::into_list(self) -> vortex_array::ArrayRef + pub fn vortex_array::scalar::ScalarValue::into_primitive(self) -> vortex_array::scalar::PValue pub fn vortex_array::scalar::ScalarValue::into_struct_fields(self) -> alloc::vec::Vec> diff --git a/vortex-array/src/builders/mod.rs b/vortex-array/src/builders/mod.rs index 489c714730c..3ceada928ea 100644 --- a/vortex-array/src/builders/mod.rs +++ b/vortex-array/src/builders/mod.rs @@ -287,3 +287,16 @@ pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box ArrayRef { + let mut builder = builder_with_capacity(dtype, scalars.len()); + + for scalar in scalars { + builder + .append_scalar(scalar) + .vortex_expect("failed to append scalar to array builder"); + } + + builder.finish() +} diff --git a/vortex-array/src/scalar/arbitrary.rs b/vortex-array/src/scalar/arbitrary.rs index b1eaa5a3f83..a047dc217e9 100644 --- a/vortex-array/src/scalar/arbitrary.rs +++ b/vortex-array/src/scalar/arbitrary.rs @@ -14,6 +14,7 @@ use vortex_buffer::BufferString; use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::dtype::DecimalDType; use crate::dtype::NativeDecimalType; @@ -73,28 +74,32 @@ pub fn random_scalar(u: &mut Unstructured, dtype: &DType) -> Result { )), ) .vortex_expect("unable to construct random `Scalar`_"), - DType::List(edt, _) => Scalar::try_new( - dtype.clone(), - Some(ScalarValue::Struct( - iter::from_fn(|| { - // Generate elements with 1/4 probability. - u.arbitrary() - .unwrap_or(false) - .then(|| random_scalar(u, edt).map(|s| s.into_value())) - }) - .collect::>>()?, - )), - ) - .vortex_expect("unable to construct random `Scalar`_"), - DType::FixedSizeList(edt, size, _) => Scalar::try_new( - dtype.clone(), - Some(ScalarValue::Struct( - (0..*size) - .map(|_| random_scalar(u, edt).map(|s| s.into_value())) - .collect::>>()?, - )), - ) - .vortex_expect("unable to construct random `Scalar`_"), + DType::List(edt, _) => { + let elements: Vec = iter::from_fn(|| { + // Generate elements with 1/4 probability. + u.arbitrary() + .unwrap_or(false) + .then(|| random_scalar(u, edt)) + }) + .collect::>>()?; + + Scalar::try_new( + dtype.clone(), + Some(ScalarValue::Array(build_array_from_scalars(edt, &elements))), + ) + .vortex_expect("unable to construct random `Scalar`_") + } + DType::FixedSizeList(edt, size, _) => { + let elements: Vec = (0..*size) + .map(|_| random_scalar(u, edt)) + .collect::>>()?; + + Scalar::try_new( + dtype.clone(), + Some(ScalarValue::Array(build_array_from_scalars(edt, &elements))), + ) + .vortex_expect("unable to construct random `Scalar`_") + } DType::Extension(..) => { unreachable!("Can't yet generate arbitrary scalars for ext dtype") } diff --git a/vortex-array/src/scalar/cast.rs b/vortex-array/src/scalar/cast.rs index 82986cd278d..22af0dc0b8f 100644 --- a/vortex-array/src/scalar/cast.rs +++ b/vortex-array/src/scalar/cast.rs @@ -23,10 +23,11 @@ impl Scalar { return Ok(self.clone()); } - // Check for solely nullability casting. - if self.dtype().eq_ignore_nullability(target_dtype) { - // Cast from non-nullable to nullable or vice versa. - // The `try_new` will handle nullability checks. + // TODO(connor): Is this correct? + // Check for solely top-level nullability casting. We compare after adjusting only the + // top-level nullability (not recursive) so that nested types like List/FSL/Struct still + // go through the full cast path when their element or field nullability differs. + if self.dtype().with_nullability(target_dtype.nullability()) == *target_dtype { return Scalar::try_new(target_dtype.clone(), self.value().cloned()); } diff --git a/vortex-array/src/scalar/constructor.rs b/vortex-array/src/scalar/constructor.rs index bbcc33210c5..97039cdc6a0 100644 --- a/vortex-array/src/scalar/constructor.rs +++ b/vortex-array/src/scalar/constructor.rs @@ -11,6 +11,7 @@ use vortex_error::VortexExpect; use vortex_error::vortex_panic; use crate::ArrayRef; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::dtype::DecimalDType; use crate::dtype::NativePType; @@ -130,6 +131,8 @@ impl Scalar { unsafe { Scalar::new_unchecked(parent_dtype, Some(scalar_value)) } } + // TODO(connor): These methods need to be deprecated!!! + /// Creates a new list scalar with the given element type and children. /// /// # Panics @@ -172,30 +175,30 @@ impl Scalar { ) -> Self { let element_dtype = element_dtype.into(); - let children: Vec> = children - .into_iter() - .map(|child| { - if child.dtype() != &*element_dtype { - vortex_panic!( - "tried to create list of {} with values of type {}", - element_dtype, - child.dtype() - ); - } - child.into_value() - }) - .collect(); + // Validate all children have the correct dtype. + for child in &children { + if child.dtype() != &*element_dtype { + vortex_panic!( + "tried to create list of {} with values of type {}", + element_dtype, + child.dtype() + ); + } + } + let size: u32 = children .len() .try_into() .vortex_expect("tried to create a list that was too large"); + let array = build_array_from_scalars(&element_dtype, &children); + let dtype = match list_kind { ListKind::Variable => DType::List(element_dtype, nullability), ListKind::FixedSize => DType::FixedSizeList(element_dtype, size, nullability), }; - Self::try_new(dtype, Some(ScalarValue::Struct(children))) + Self::try_new(dtype, Some(ScalarValue::Array(array))) .vortex_expect("unable to construct a list `Scalar`") } diff --git a/vortex-array/src/scalar/convert/into_scalar.rs b/vortex-array/src/scalar/convert/into_scalar.rs index 869075f1d69..5c3e335358c 100644 --- a/vortex-array/src/scalar/convert/into_scalar.rs +++ b/vortex-array/src/scalar/convert/into_scalar.rs @@ -9,6 +9,7 @@ use vortex_buffer::BufferString; use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::dtype::DecimalDType; use crate::dtype::NativeDType; @@ -82,11 +83,9 @@ where Scalar: From, { fn from(vec: Vec) -> Self { - ScalarValue::Struct( - vec.into_iter() - .map(|elem| Scalar::from(elem).into_value()) - .collect(), - ) + let dtype = T::dtype(); + let scalars: Vec = vec.into_iter().map(Scalar::from).collect(); + ScalarValue::Array(build_array_from_scalars(&dtype, &scalars)) } } diff --git a/vortex-array/src/scalar/downcast.rs b/vortex-array/src/scalar/downcast.rs index 6b788d81ffc..323179197e9 100644 --- a/vortex-array/src/scalar/downcast.rs +++ b/vortex-array/src/scalar/downcast.rs @@ -8,6 +8,7 @@ use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; use vortex_error::vortex_panic; +use crate::array::ArrayRef; use crate::scalar::BinaryScalar; use crate::scalar::BoolScalar; use crate::scalar::DecimalScalar; @@ -199,12 +200,20 @@ impl ScalarValue { } } - /// Returns the list elements as struct elements, panicking if the value is not a - /// [`List`](ScalarValue::List). + /// Returns the struct field values, panicking if the value is not a + /// [`Struct`](ScalarValue::Struct). pub fn as_struct_fields(&self) -> &[Option] { match self { ScalarValue::Struct(elements) => elements, - _ => vortex_panic!("ScalarValue is not a List"), + _ => vortex_panic!("ScalarValue is not a Struct"), + } + } + + /// Returns the array value, panicking if the value is not an [`Array`](ScalarValue::Array). + pub fn as_list(&self) -> &ArrayRef { + match self { + ScalarValue::Array(array) => array, + _ => vortex_panic!("ScalarValue is not an Array"), } } @@ -250,11 +259,20 @@ impl ScalarValue { } } - /// Returns the list elements, panicking if the value is not a [`List`](ScalarValue::List). + /// Returns the struct field values, panicking if the value is not a + /// [`Struct`](ScalarValue::Struct). pub fn into_struct_fields(self) -> Vec> { match self { ScalarValue::Struct(elements) => elements, - _ => vortex_panic!("ScalarValue is not a List"), + _ => vortex_panic!("ScalarValue is not a Struct"), + } + } + + /// Returns the array value, panicking if the value is not an [`Array`](ScalarValue::Array). + pub fn into_list(self) -> ArrayRef { + match self { + ScalarValue::Array(array) => array, + _ => vortex_panic!("ScalarValue is not an Array"), } } } diff --git a/vortex-array/src/scalar/proto.rs b/vortex-array/src/scalar/proto.rs index ba64c800156..4bd7dbdcd4a 100644 --- a/vortex-array/src/scalar/proto.rs +++ b/vortex-array/src/scalar/proto.rs @@ -25,6 +25,7 @@ use vortex_session::VortexSession; use crate::ArrayContext; use crate::ArrayRef; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::dtype::DecimalDType; use crate::dtype::PType; @@ -113,12 +114,12 @@ impl From<&ScalarValue> for pb::ScalarValue { kind: Some(Kind::BytesValue(v.to_vec())), }, ScalarValue::Struct(v) => { - let mut values = Vec::with_capacity(v.len()); + let mut fields = Vec::with_capacity(v.len()); for elem in v.iter() { - values.push(ScalarValue::to_proto(elem.as_ref())); + fields.push(ScalarValue::to_proto(elem.as_ref())); } pb::ScalarValue { - kind: Some(Kind::StructValue(StructValue { values })), + kind: Some(Kind::StructValue(StructValue { fields })), } } ScalarValue::Array(array) => { @@ -283,6 +284,7 @@ impl ScalarValue { _ => dtype, }; + // TODO(connor): These helper functions should probably be called "from_proto_<>" instead. Ok(Some(match kind { Kind::NullValue(_) => return Ok(None), Kind::BoolValue(v) => bool_from_proto(*v, dtype)?, @@ -293,7 +295,7 @@ impl ScalarValue { Kind::F64Value(v) => f64_from_proto(*v, dtype)?, Kind::StringValue(s) => string_from_proto(s, dtype)?, Kind::BytesValue(b) => bytes_from_proto(b, dtype)?, - Kind::StructValue(v) => list_from_proto(v, dtype, session)?, + Kind::StructValue(v) => struct_from_proto(v, dtype, session)?, Kind::ArrayValue(a) => array_from_proto(a, dtype, session)?, })) } @@ -470,6 +472,70 @@ fn decimal_from_proto(bytes: &[u8], _decimal_dtype: &DecimalDType) -> VortexResu Ok(ScalarValue::Decimal(value)) } +/// Deserialize a [`ScalarValue`] from a protobuf [`StructValue`]. +/// +/// For [`DType::Struct`], this produces [`ScalarValue::Struct`] with the deserialized fields. +/// +/// For [`DType::List`] and [`DType::FixedSizeList`], this handles backwards compatibility: old +/// files may have stored list scalars as `StructValue` (formerly `ListValue`). +/// +/// We reconstruct an array from the individual scalar elements and return [`ScalarValue::Array`]. +fn struct_from_proto( + v: &StructValue, + dtype: &DType, + session: &VortexSession, +) -> VortexResult { + match dtype { + DType::Struct(struct_fields, _) => { + let mut values = Vec::with_capacity(v.fields.len()); + for (elem, field_dtype) in v.fields.iter().zip(struct_fields.fields()) { + values.push(ScalarValue::from_proto(elem, &field_dtype, session)?); + } + Ok(ScalarValue::Struct(values)) + } + // Handle backcompat list scalars. + DType::List(elem_dtype, _) => { + let scalars = v + .fields + .iter() + .map(|elem| { + let value = ScalarValue::from_proto(elem, elem_dtype.as_ref(), session)?; + Scalar::try_new(elem_dtype.as_ref().clone(), value) + }) + .collect::>>()?; + + Ok(ScalarValue::Array(build_array_from_scalars( + elem_dtype, &scalars, + ))) + } + // Handle backcompat fixed-size list scalars. + DType::FixedSizeList(elem_dtype, size, _) => { + vortex_ensure_eq!( + v.fields.len(), + *size as usize, + "FixedSizeList expected {size} elements, got {}", + v.fields.len() + ); + + let scalars = v + .fields + .iter() + .map(|elem| { + let value = ScalarValue::from_proto(elem, elem_dtype.as_ref(), session)?; + Scalar::try_new(elem_dtype.as_ref().clone(), value) + }) + .collect::>>()?; + + Ok(ScalarValue::Array(build_array_from_scalars( + elem_dtype, &scalars, + ))) + } + _ => vortex_bail!( + Serde: "expected Struct, List, or FixedSizeList dtype for StructValue, got {dtype}" + ), + } +} + /// The inner function for deserializing a [`ScalarValue::Array`] from a protobuf `ArrayValue`. fn array_from_proto_inner( array_value: &ArrayValue, @@ -516,28 +582,6 @@ fn array_from_proto( } } -/// Deserialize a [`ScalarValue::List`] from a protobuf `ListValue`. -fn list_from_proto( - v: &StructValue, - dtype: &DType, - session: &VortexSession, -) -> VortexResult { - let element_dtype = dtype - .as_list_element_opt() - .ok_or_else(|| vortex_err!(Serde: "expected List dtype for ListValue, got {dtype}"))?; - - let mut values = Vec::with_capacity(v.values.len()); - for elem in v.values.iter() { - values.push(ScalarValue::from_proto( - elem, - element_dtype.as_ref(), - session, - )?); - } - - Ok(ScalarValue::Struct(values)) -} - #[cfg(test)] mod tests { use std::sync::Arc; @@ -607,18 +651,61 @@ mod tests { #[test] fn test_list() { - round_trip(Scalar::new( - DType::List( - Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::Struct(vec![ - Some(ScalarValue::Primitive(42i32.into())), - Some(ScalarValue::Primitive(43i32.into())), - ])), + let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)); + round_trip(Scalar::list( + element_dtype, + vec![ + Scalar::primitive(42i32, Nullability::Nullable), + Scalar::primitive(43i32, Nullability::Nullable), + ], + Nullability::Nullable, )); } + /// Backwards compatibility: old files may have stored list scalars as `StructValue` (tag 9) + /// in the proto. Verify that deserializing such a payload produces `ScalarValue::Array`. + #[test] + fn test_backcompat_struct_value_with_list_dtype() { + let dtype = DType::List( + Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), + Nullability::Nullable, + ); + + // Build a StructValue proto as old writers would have produced. + let pb_struct = StructValue { + fields: vec![ + ScalarValue::to_proto(Some(&ScalarValue::Primitive(42i32.into()))), + ScalarValue::to_proto(Some(&ScalarValue::Primitive(43i32.into()))), + ], + }; + let pb_scalar_value = pb::ScalarValue { + kind: Some(Kind::StructValue(pb_struct)), + }; + + let deserialized = ScalarValue::from_proto(&pb_scalar_value, &dtype, &session()) + .unwrap() + .expect("should not be null"); + + // The result should be an Array, not a Struct. + assert!( + matches!(deserialized, ScalarValue::Array(_)), + "expected ScalarValue::Array for backcompat list deserialization, got {deserialized:?}" + ); + + // Verify the values are correct. + let scalar = Scalar::try_new(dtype, Some(deserialized)).unwrap(); + let list = scalar.as_list(); + assert_eq!(list.len(), 2); + assert_eq!( + list.element(0).unwrap().as_primitive().typed_value::(), + Some(42) + ); + assert_eq!( + list.element(1).unwrap().as_primitive().typed_value::(), + Some(43) + ); + } + #[test] fn test_f16() { round_trip(Scalar::primitive( diff --git a/vortex-array/src/scalar/scalar_value.rs b/vortex-array/src/scalar/scalar_value.rs index 26afd2d1e4b..f20b57c5261 100644 --- a/vortex-array/src/scalar/scalar_value.rs +++ b/vortex-array/src/scalar/scalar_value.rs @@ -12,14 +12,14 @@ use itertools::Itertools; use vortex_buffer::BufferString; use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; -use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_error::vortex_panic; use crate::ArrayRef; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::scalar::DecimalValue; use crate::scalar::PValue; +use crate::scalar::Scalar; /// The value stored in a [`Scalar`](crate::scalar::Scalar). /// @@ -71,10 +71,15 @@ impl ScalarValue { DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)), DType::Utf8(_) => Self::Utf8(BufferString::empty()), DType::Binary(_) => Self::Binary(ByteBuffer::empty()), - DType::List(..) => Self::Struct(vec![]), + DType::List(edt, _) => Self::Array(build_array_from_scalars(edt, &[])), DType::FixedSizeList(edt, size, _) => { - let elements = (0..*size).map(|_| Some(Self::zero_value(edt))).collect(); - Self::Struct(elements) + let elements: Vec = (0..*size) + .map(|_| { + Scalar::try_new(edt.as_ref().clone(), Some(Self::zero_value(edt))) + .vortex_expect("failed to create zero scalar") + }) + .collect(); + Self::Array(build_array_from_scalars(edt, &elements)) } DType::Struct(fields, _) => { let field_values = fields @@ -110,10 +115,15 @@ impl ScalarValue { DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)), DType::Utf8(_) => Self::Utf8(BufferString::empty()), DType::Binary(_) => Self::Binary(ByteBuffer::empty()), - DType::List(..) => Self::Struct(vec![]), + DType::List(edt, _) => Self::Array(build_array_from_scalars(edt, &[])), DType::FixedSizeList(edt, size, _) => { - let elements = (0..*size).map(|_| Self::default_value(edt)).collect(); - Self::Struct(elements) + let elements: Vec = (0..*size) + .map(|_| { + Scalar::try_new(edt.as_ref().clone(), Self::default_value(edt)) + .vortex_expect("failed to create default scalar") + }) + .collect(); + Self::Array(build_array_from_scalars(edt, &elements)) } DType::Struct(fields, _) => { let field_values = fields.fields().map(|f| Self::default_value(&f)).collect(); @@ -127,24 +137,6 @@ impl ScalarValue { } }) } - - // TODO(connor): Is there a more performant way we can do this? - /// Returns a `List` representation of this value, expanding `Array` variants as needed. - /// - /// If the value is already a `List`, it is cloned as-is. If it is an `Array`, each element is - /// extracted via [`scalar_at`](crate::Array::scalar_at) and collected into a `List`. - fn as_list_expanded(&self) -> VortexResult { - match self { - Self::Struct(_) => Ok(self.clone()), - Self::Array(array) => { - let values = (0..array.len()) - .map(|i| array.scalar_at(i).map(|scalar| scalar.value().cloned())) - .collect::>>()?; - Ok(Self::Struct(values)) - } - _ => vortex_bail!("Expected List or Array ScalarValue"), - } - } } impl fmt::Display for ScalarValue { @@ -191,13 +183,18 @@ impl fmt::Display for ScalarValue { } write!(f, "]") } - ScalarValue::Array(_) => { - // We simply expand the value out into a list to display it. - let expanded = self.as_list_expanded().vortex_expect( - "something went wrong when expanding scalar value for displaying", - ); - - expanded.fmt(f) + ScalarValue::Array(array) => { + write!(f, "[")?; + for i in 0..array.len() { + if i > 0 { + write!(f, ", ")?; + } + match array.scalar_at(i) { + Ok(scalar) => write!(f, "{scalar}")?, + Err(_) => write!(f, "")?, + } + } + write!(f, "]") } } } @@ -212,24 +209,12 @@ impl PartialEq for ScalarValue { (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a == b, (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a == b, (ScalarValue::Struct(a), ScalarValue::Struct(b)) => a == b, - // Two arrays: compare element-by-element without allocating intermediate Vecs. (ScalarValue::Array(a), ScalarValue::Array(b)) => { - // TODO(connor): Is there a better way to do this? + // Compare element-by-element. a.len() == b.len() - && a.dtype() == b.dtype() + && a.dtype().eq_ignore_nullability(b.dtype()) && (0..a.len()).all(|i| a.scalar_at(i).ok() == b.scalar_at(i).ok()) } - // Cross-variant comparisons between `Array` and `List`: expand the `Array` side. - (ScalarValue::Array(_), ScalarValue::Struct(_)) - | (ScalarValue::Struct(_), ScalarValue::Array(_)) => { - let lhs = self.as_list_expanded().vortex_expect( - "something went wrong when expanding lhs scalar value for comparison", - ); - let rhs = other.as_list_expanded().vortex_expect( - "something went wrong when expanding rhs scalar value for comparison", - ); - lhs == rhs - } _ => false, } } @@ -246,17 +231,26 @@ impl PartialOrd for ScalarValue { (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a.partial_cmp(b), (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a.partial_cmp(b), (ScalarValue::Struct(a), ScalarValue::Struct(b)) => a.partial_cmp(b), - // Any combination involving `Array`: expand both sides and delegate. - (ScalarValue::Array(_), ScalarValue::Struct(_)) - | (ScalarValue::Struct(_), ScalarValue::Array(_)) - | (ScalarValue::Array(_), ScalarValue::Array(_)) => { - let lhs = self.as_list_expanded().vortex_expect( - "something went wrong when expanding lhs scalar value for comparison", - ); - let rhs = other.as_list_expanded().vortex_expect( - "something went wrong when expanding rhs scalar value for comparison", - ); - lhs.partial_cmp(&rhs) + // Two arrays: lexicographic element-by-element comparison. + (ScalarValue::Array(a), ScalarValue::Array(b)) => { + let min_len = a.len().min(b.len()); + + // Compute the lexicographic ordering. + for i in 0..min_len { + let a_scalar = a.scalar_at(i).vortex_expect( + "something happened with scalar_at in `PartialOrd` of `ListScalar`", + ); + let b_scalar = b.scalar_at(i).vortex_expect( + "something happened with scalar_at in `PartialOrd` of `ListScalar`", + ); + + match a_scalar.partial_cmp(&b_scalar)? { + Ordering::Equal => continue, + ord => return Some(ord), + } + } + + a.len().partial_cmp(&b.len()) } _ => None, } @@ -272,10 +266,15 @@ impl Hash for ScalarValue { ScalarValue::Utf8(s) => s.hash(state), ScalarValue::Binary(b) => b.hash(state), ScalarValue::Struct(l) => l.hash(state), - ScalarValue::Array(_) => self - .as_list_expanded() - .vortex_expect("something went wrong when expanding scalar value for hashing") - .hash(state), + ScalarValue::Array(array) => { + array.len().hash(state); + for i in 0..array.len() { + let scalar = array.scalar_at(i).vortex_expect( + "something happened with `scalar_at` in `Hash` of `ScalarValue::Array`", + ); + scalar.hash(state); + } + } } } } diff --git a/vortex-array/src/scalar/tests/casting.rs b/vortex-array/src/scalar/tests/casting.rs index 0317e7f6985..106ef6e5f62 100644 --- a/vortex-array/src/scalar/tests/casting.rs +++ b/vortex-array/src/scalar/tests/casting.rs @@ -201,22 +201,17 @@ mod tests { let f16_value1 = f16::from_f32(1.0); let f16_value2 = f16::from_f32(2.0); - let list_dtype = DType::List( - Arc::new(DType::Primitive(PType::F16, Nullability::NonNullable)), + let element_dtype = Arc::new(DType::Primitive(PType::F16, Nullability::NonNullable)); + + let scalar = Scalar::list( + element_dtype, + vec![ + Scalar::primitive(f16_value1, Nullability::NonNullable), + Scalar::primitive(f16_value2, Nullability::NonNullable), + ], Nullability::NonNullable, ); - let elements = vec![ - Some(ScalarValue::Primitive(PValue::U64( - f16_value1.to_bits() as u64 - ))), - Some(ScalarValue::Primitive(PValue::U64( - f16_value2.to_bits() as u64 - ))), - ]; - - let scalar = Scalar::new(list_dtype, Some(ScalarValue::Struct(elements))); - let list_scalar = scalar.as_list(); let elements = list_scalar.elements().unwrap(); diff --git a/vortex-array/src/scalar/tests/nested.rs b/vortex-array/src/scalar/tests/nested.rs index d4df124b868..a8260e2d47a 100644 --- a/vortex-array/src/scalar/tests/nested.rs +++ b/vortex-array/src/scalar/tests/nested.rs @@ -10,9 +10,7 @@ mod tests { use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; - use crate::scalar::PValue; use crate::scalar::Scalar; - use crate::scalar::ScalarValue; #[test] fn test_fixed_size_list_of_fixed_size_list() { @@ -306,15 +304,13 @@ mod tests { #[test] fn test_list_cast_element_types() { // Test casting list elements between different primitive types. - let list = Scalar::new( - DType::List( - Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::Struct(vec![ - Some(ScalarValue::Primitive(PValue::U16(6))), - Some(ScalarValue::Primitive(PValue::U16(100))), - ])), + let list = Scalar::list( + Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), + vec![ + Scalar::primitive(6u16, Nullability::Nullable), + Scalar::primitive(100u16, Nullability::Nullable), + ], + Nullability::Nullable, ); // Cast U16 -> U32. @@ -345,16 +341,14 @@ mod tests { #[test] fn test_list_cast_element_overflow() { // Test that casting U16 values too large for U8 fails. - let list_with_large_values = Scalar::new( - DType::List( - Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::Struct(vec![ - Some(ScalarValue::Primitive(PValue::U16(100))), - Some(ScalarValue::Primitive(PValue::U16(256))), // Too large for U8 - Some(ScalarValue::Primitive(PValue::U16(1000))), // Too large for U8 - ])), + let list_with_large_values = Scalar::list( + Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), + vec![ + Scalar::primitive(100u16, Nullability::Nullable), + Scalar::primitive(256u16, Nullability::Nullable), // Too large for U8. + Scalar::primitive(1000u16, Nullability::Nullable), // Too large for U8. + ], + Nullability::Nullable, ); let target_u8 = DType::List( diff --git a/vortex-array/src/scalar/tests/nullability.rs b/vortex-array/src/scalar/tests/nullability.rs index cb26bc94a19..830a6c3040c 100644 --- a/vortex-array/src/scalar/tests/nullability.rs +++ b/vortex-array/src/scalar/tests/nullability.rs @@ -15,9 +15,7 @@ mod tests { use crate::extension::datetime::Date; use crate::extension::datetime::TimeUnit; use crate::extension::datetime::Timestamp; - use crate::scalar::PValue; use crate::scalar::Scalar; - use crate::scalar::ScalarValue; #[rstest] fn null_can_cast_to_anything_nullable( @@ -49,14 +47,10 @@ mod tests { #[test] fn test_list_cast_nullability_changes() { - let list = Scalar::new( - DType::List( - Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::Struct(vec![Some(ScalarValue::Primitive( - PValue::U16(6), - ))])), + let list = Scalar::list( + Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), + vec![Scalar::primitive(6u16, Nullability::Nullable)], + Nullability::Nullable, ); // Change element nullability from Nullable to NonNullable. @@ -92,19 +86,17 @@ mod tests { #[test] fn test_list_cast_with_null_elements() { - let list_with_null = Scalar::new( - DType::List( - Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::Struct(vec![ - Some(ScalarValue::Primitive(PValue::U16(6))), - None, - Some(ScalarValue::Primitive(PValue::U16(10))), - ])), + let list_with_null = Scalar::list( + Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), + vec![ + Scalar::primitive(6u16, Nullability::Nullable), + Scalar::null(DType::Primitive(PType::U16, Nullability::Nullable)), + Scalar::primitive(10u16, Nullability::Nullable), + ], + Nullability::Nullable, ); - // Cast to different element type with nullable elements - should succeed. + // Cast to different element type with nullable elements — should succeed. let target_u8_nullable = DType::List( Arc::from(DType::Primitive(PType::U8, Nullability::Nullable)), Nullability::Nullable, @@ -195,15 +187,13 @@ mod tests { #[test] fn test_list_cast_null_to_nonnull_error() { - let list_with_null = Scalar::new( - DType::List( - Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::Struct(vec![ - Some(ScalarValue::Primitive(PValue::U16(6))), - None, - ])), + let list_with_null = Scalar::list( + Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), + vec![ + Scalar::primitive(6u16, Nullability::Nullable), + Scalar::null(DType::Primitive(PType::U16, Nullability::Nullable)), + ], + Nullability::Nullable, ); // Casting to non-nullable element type should fail. diff --git a/vortex-array/src/scalar/typed_view/list.rs b/vortex-array/src/scalar/typed_view/list.rs index 52e14581ec6..8e8810ca3de 100644 --- a/vortex-array/src/scalar/typed_view/list.rs +++ b/vortex-array/src/scalar/typed_view/list.rs @@ -3,9 +3,11 @@ //! [`ListScalar`] typed view implementation. +use std::cmp::Ordering; use std::fmt::Display; use std::fmt::Formatter; use std::hash::Hash; +use std::hash::Hasher; use std::sync::Arc; use itertools::Itertools; @@ -16,6 +18,7 @@ use vortex_error::vortex_err; use vortex_error::vortex_panic; use crate::ArrayRef; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::scalar::Scalar; use crate::scalar::ScalarValue; @@ -29,6 +32,8 @@ use crate::scalar::ScalarValue; /// more elements of the same type, or be null. If the `dtype` is a [`FixedSizeList`], then the /// number of `elements` is equal to the `size` field of the [`FixedSizeList`]. /// +/// The underlying data is always stored as a [`ScalarValue::Array`]. +/// /// [`FixedSizeList`]: DType::FixedSizeList #[derive(Debug, Clone)] pub struct ListScalar<'a> { @@ -39,8 +44,8 @@ pub struct ListScalar<'a> { /// every time we want to access this. element_dtype: &'a Arc, - /// The elements of this list scalar, or `None` if the scalar is null. - elements: Option>, + /// The underlying array, or `None` if the scalar is null. + array: Option<&'a ArrayRef>, } impl<'a> ListScalar<'a> { @@ -57,11 +62,7 @@ impl<'a> ListScalar<'a> { Ok(Self { dtype, element_dtype, - elements: value.map(|v| match v { - ScalarValue::Struct(elems) => Elements::Expanded(elems), - ScalarValue::Array(array) => Elements::Array(array), - _ => vortex_panic!("Expected List or Array ScalarValue for list dtype"), - }), + array: value.map(|v| v.as_list()), }) } @@ -76,30 +77,27 @@ impl<'a> ListScalar<'a> { /// Returns 0 if the list is null. #[inline] pub fn len(&self) -> usize { - self.elements.as_ref().map(|e| e.len()).unwrap_or(0) + self.array.map(|a| a.len()).unwrap_or(0) } /// Returns true if the list has no elements or is null. #[inline] pub fn is_empty(&self) -> bool { - self.elements.as_ref().map(|e| e.is_empty()).unwrap_or(true) + self.array.map(|a| a.is_empty()).unwrap_or(true) } /// Returns true if the list is null. #[inline] pub fn is_null(&self) -> bool { - self.elements.is_none() + self.array.is_none() } - /// Returns the underlying [`ArrayRef`] if this scalar is backed by an array. + /// Returns the underlying [`ArrayRef`]. /// /// This is useful for bulk operations (e.g., in builders) that can avoid expanding the array /// into individual scalars. pub fn as_array(&self) -> Option<&'a ArrayRef> { - self.elements.and_then(|e| match e { - Elements::Expanded(_) => None, - Elements::Array(array) => Some(array), - }) + self.array } /// Returns the data type of the list's elements. @@ -113,36 +111,39 @@ impl<'a> ListScalar<'a> { /// Returns the element at the given index as a scalar. /// /// Returns `None` if the list is null or the index is out of bounds. - /// - /// For `Array`-backed scalars, this calls [`Array::scalar_at()`](crate::Array::scalar_at) on - /// the underlying array which may involve per-element work. For `Expanded` list scalars, this - /// is a scalar clone. pub fn element(&self, idx: usize) -> Option { - self.elements.and_then(|l| l.get_owned(idx)).map(|value| { - // SAFETY: `ListScalar` is already a valid `Scalar`. - unsafe { Scalar::new_unchecked(self.element_dtype().clone(), value) } - }) + let array = self.array?; + if idx >= array.len() { + return None; + } + + Some( + array + .scalar_at(idx) + .vortex_expect("index already bounds-checked"), + ) } /// Returns all elements in the list as a vector of scalars. /// /// Returns `None` if the list is null. /// - /// This always materializes every element into a `Vec`. For `Array`-backed scalars this - /// requires calling [`Array::scalar_at()`](crate::Array::scalar_at) for each element. + /// This always materializes every element into a `Vec` by calling `Array::scalar_at()` for each + /// element. /// /// Prefer [`as_array()`](Self::as_array) when the underlying array can be used directly (e.g., /// in builders). pub fn elements(&self) -> Option> { - self.elements.map(|elems| { - elems - .iter() - .map(|e| { - // SAFETY: `ListScalar` is already a valid `Scalar`. - unsafe { Scalar::new_unchecked(self.element_dtype().clone(), e) } + let array = self.array?; + Some( + (0..array.len()) + .map(|i| { + array + .scalar_at(i) + .vortex_expect("index already bounds-checked") }) - .collect_vec() - }) + .collect_vec(), + ) } /// Casts the list to the target [`DType`]. @@ -175,44 +176,38 @@ impl<'a> ListScalar<'a> { ) } - Scalar::try_new( - dtype.clone(), - Some(ScalarValue::Struct( - self.elements - .ok_or_else(|| vortex_err!("nullness should be handled in Scalar::cast"))? - .iter() - .map(|element| { - // Recursively cast the elements of the list. - Scalar::try_new(DType::clone(self.element_dtype), element)? - .cast(target_element_dtype) - .map(|x| x.into_value()) - }) - .collect::>>>()?, - )), - ) + let array = self + .array + .ok_or_else(|| vortex_err!("nullness should be handled in Scalar::cast"))?; + + // Cast each element and build a new array with the target element dtype. + let casted_elements = (0..array.len()) + .map(|i| { + array + .scalar_at(i) + .vortex_expect("index already bounds-checked") + .cast(target_element_dtype) + }) + .collect::>>()?; + + let casted_array = build_array_from_scalars(target_element_dtype, &casted_elements); + + Scalar::try_new(dtype.clone(), Some(ScalarValue::Array(casted_array))) } } impl Display for ListScalar<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match &self.elements { + match self.elements() { None => write!(f, "null"), - Some(elems) => { + Some(scalars) => { let type_str: &dyn Display = if let DType::FixedSizeList(_, size, _) = self.dtype { &format!("fixed_size<{size}>") } else { &"" }; - write!( - f, - "{type_str}[{}]", - elems - .iter() - .map(|e| Scalar::try_new(self.element_dtype().clone(), e) - .vortex_expect("`ListScalar` is already a valid `Scalar`")) - .format(", ") - ) + write!(f, "{type_str}[{}]", scalars.iter().format(", ")) } } } @@ -224,11 +219,12 @@ impl PartialEq for ListScalar<'_> { return false; } - match (self.elements, other.elements) { + match (self.array, other.array) { (None, None) => true, (Some(a), Some(b)) => { - // TODO(connor): Is this the best we can do? - a.len() == b.len() && (0..a.len()).all(|i| a.get_owned(i) == b.get_owned(i)) + // Compare element-by-element. + a.len() == b.len() + && (0..a.len()).all(|i| a.scalar_at(i).ok() == b.scalar_at(i).ok()) } _ => false, } @@ -239,7 +235,7 @@ impl Eq for ListScalar<'_> {} /// Ord is not implemented since it's undefined for different element DTypes. impl PartialOrd for ListScalar<'_> { - fn partial_cmp(&self, other: &Self) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { if !self .element_dtype .eq_ignore_nullability(other.element_dtype) @@ -247,99 +243,50 @@ impl PartialOrd for ListScalar<'_> { return None; } - self.elements().partial_cmp(&other.elements()) - } -} - -impl Hash for ListScalar<'_> { - fn hash(&self, state: &mut H) { - self.dtype.hash(state); - self.len().hash(state); + match (self.array, other.array) { + (None, None) => Some(Ordering::Equal), + (None, Some(_)) => Some(Ordering::Less), + (Some(_), None) => Some(Ordering::Greater), + (Some(a), Some(b)) => { + let min_len = a.len().min(b.len()); + + // Compute the lexicographic ordering. + for i in 0..min_len { + let a_scalar = a.scalar_at(i).vortex_expect( + "something happened with scalar_at in `PartialOrd` of `ListScalar`", + ); + let b_scalar = b.scalar_at(i).vortex_expect( + "something happened with scalar_at in `PartialOrd` of `ListScalar`", + ); + + match a_scalar.partial_cmp(&b_scalar)? { + Ordering::Equal => continue, + ord => return Some(ord), + } + } - if let Some(elems) = self.elements { - for i in 0..elems.len() { - elems.get_owned(i).hash(state); + a.len().partial_cmp(&b.len()) } } } } -/// The elements of the list. -#[derive(Debug, Clone, Copy)] -enum Elements<'a> { - /// Each element is `Option` where `None` represents a null element within the - /// list. This used to be the only representation of lists (before we added an `Array` variant). - Expanded(&'a [Option]), - /// A list represented by an [`ArrayRef`]. - Array(&'a ArrayRef), -} - -impl<'a> Elements<'a> { - /// Returns the number of elements in the list. - fn len(&self) -> usize { - match self { - Elements::Expanded(elems) => elems.len(), - Elements::Array(array) => array.len(), - } - } - - /// Returns true if the list has no elements. - fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns an iterator over the elements of the list. - fn iter(self) -> OwnedElementsIter<'a> { - OwnedElementsIter { - elements: self, - index: 0, - } - } +impl Hash for ListScalar<'_> { + fn hash(&self, state: &mut H) { + self.dtype.hash(state); + self.len().hash(state); - /// Returns the element at the given index as an `Option`. - /// - /// Returns `None` if the index is out of bounds. Returns `Some(None)` if the element is null. - /// Returns `Some(Some(ScalarValue))` if the element is non-null. - fn get_owned(&self, idx: usize) -> Option> { - match self { - Elements::Expanded(elems) => elems.get(idx).cloned(), - Elements::Array(array) => { - if idx >= array.len() { - None - } else { - let scalar = array - .scalar_at(idx) - .vortex_expect("index already bounds-checked"); - Some(scalar.into_value()) - } + if let Some(array) = self.array { + for i in 0..array.len() { + let scalar = array + .scalar_at(i) + .vortex_expect("something happened with `scalar_at` in `Hash` of `ListScalar`"); + scalar.hash(state); } } } } -/// An iterator over the elements of a list, yielding `Option` for each element. -struct OwnedElementsIter<'a> { - elements: Elements<'a>, - index: usize, -} - -impl Iterator for OwnedElementsIter<'_> { - type Item = Option; - - fn next(&mut self) -> Option { - let item = self.elements.get_owned(self.index)?; - self.index += 1; - Some(item) - } - - fn size_hint(&self) -> (usize, Option) { - let remaining = self.elements.len() - self.index; - (remaining, Some(remaining)) - } -} - -impl ExactSizeIterator for OwnedElementsIter<'_> {} - #[cfg(test)] mod tests { use std::sync::Arc; @@ -398,7 +345,7 @@ mod tests { let list = list_scalar.as_list(); - // Test element access + // Test element access. let elem0 = list.element(0).unwrap(); assert_eq!(elem0.as_primitive().typed_value::().unwrap(), 10); @@ -408,7 +355,7 @@ mod tests { let elem2 = list.element(2).unwrap(); assert_eq!(elem2.as_primitive().typed_value::().unwrap(), 30); - // Test out of bounds + // Test out of bounds. assert!(list.element(3).is_none()); } @@ -585,7 +532,7 @@ mod tests { let list = list_scalar.as_list(); - // Cast to list with i64 elements + // Cast to list with i64 elements. let target_dtype = DType::List( Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)), Nullability::NonNullable, diff --git a/vortex-array/src/scalar/validate.rs b/vortex-array/src/scalar/validate.rs index 2ef80a71719..21a06a8de57 100644 --- a/vortex-array/src/scalar/validate.rs +++ b/vortex-array/src/scalar/validate.rs @@ -74,46 +74,32 @@ impl Scalar { ); } DType::List(elem_dtype, _) => { - if let ScalarValue::Array(array) = value { - // If the scalar value is an array, we just need to check that the array - // elements match the elem_dtype, so just check that the dtype is the same. - vortex_ensure_eq!(elem_dtype.as_ref(), array.dtype()); - return Ok(()); - } - - let ScalarValue::Struct(elements) = value else { - vortex_bail!("list dtype expected List value, got {value}"); + let ScalarValue::Array(array) = value else { + vortex_bail!("list dtype expected Array value, got {value}"); }; - - for (i, element) in elements.iter().enumerate() { - Self::validate(elem_dtype.as_ref(), element.as_ref()) - .map_err(|e| vortex_error::vortex_err!("list element at index {i}: {e}"))?; - } + vortex_ensure_eq!( + elem_dtype.as_ref(), + array.dtype(), + "list element dtype {elem_dtype} is not compatible with array dtype {}", + array.dtype() + ); } DType::FixedSizeList(elem_dtype, size, _) => { - if let ScalarValue::Array(array) = value { - // If the scalar value is an array, we just need to check that the array - // elements match the elem_dtype, so just check that the dtype is the same. - vortex_ensure_eq!(elem_dtype.as_ref(), array.dtype()); - return Ok(()); - } - - let ScalarValue::Struct(elements) = value else { - vortex_bail!("fixed-size list dtype expected List value, got {value}",); + let ScalarValue::Array(array) = value else { + vortex_bail!("fixed-size list dtype expected Array value, got {value}"); }; - - let len = elements.len(); vortex_ensure_eq!( - len, + elem_dtype.as_ref(), + array.dtype(), + "FSL element dtype {elem_dtype} is not compatible with array dtype {}", + array.dtype() + ); + vortex_ensure_eq!( + array.len(), *size as usize, - "fixed-size list dtype expected {size} elements, got {len}", + "fixed-size list expected {size} elements, got {}", + array.len() ); - - for (i, element) in elements.iter().enumerate() { - Self::validate(elem_dtype.as_ref(), element.as_ref()).map_err(|e| { - vortex_error::vortex_err!("fixed-size list element at index {i}: {e}",) - })?; - } } DType::Struct(fields, _) => { let ScalarValue::Struct(values) = value else { diff --git a/vortex-proto/public-api.lock b/vortex-proto/public-api.lock index be1dfde2dcb..a8dbc9fa187 100644 --- a/vortex-proto/public-api.lock +++ b/vortex-proto/public-api.lock @@ -1098,12 +1098,12 @@ pub vortex_proto::scalar::scalar_value::Kind::F64Value(f64) pub vortex_proto::scalar::scalar_value::Kind::Int64Value(i64) -pub vortex_proto::scalar::scalar_value::Kind::ListValue(vortex_proto::scalar::ListValue) - pub vortex_proto::scalar::scalar_value::Kind::NullValue(i32) pub vortex_proto::scalar::scalar_value::Kind::StringValue(alloc::string::String) +pub vortex_proto::scalar::scalar_value::Kind::StructValue(vortex_proto::scalar::StructValue) + pub vortex_proto::scalar::scalar_value::Kind::Uint64Value(u64) impl vortex_proto::scalar::scalar_value::Kind @@ -1166,34 +1166,6 @@ pub fn vortex_proto::scalar::ArrayValue::clear(&mut self) pub fn vortex_proto::scalar::ArrayValue::encoded_len(&self) -> usize -pub struct vortex_proto::scalar::ListValue - -pub vortex_proto::scalar::ListValue::values: alloc::vec::Vec - -impl core::clone::Clone for vortex_proto::scalar::ListValue - -pub fn vortex_proto::scalar::ListValue::clone(&self) -> vortex_proto::scalar::ListValue - -impl core::cmp::PartialEq for vortex_proto::scalar::ListValue - -pub fn vortex_proto::scalar::ListValue::eq(&self, other: &vortex_proto::scalar::ListValue) -> bool - -impl core::default::Default for vortex_proto::scalar::ListValue - -pub fn vortex_proto::scalar::ListValue::default() -> Self - -impl core::fmt::Debug for vortex_proto::scalar::ListValue - -pub fn vortex_proto::scalar::ListValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result - -impl core::marker::StructuralPartialEq for vortex_proto::scalar::ListValue - -impl prost::message::Message for vortex_proto::scalar::ListValue - -pub fn vortex_proto::scalar::ListValue::clear(&mut self) - -pub fn vortex_proto::scalar::ListValue::encoded_len(&self) -> usize - pub struct vortex_proto::scalar::Scalar pub vortex_proto::scalar::Scalar::dtype: core::option::Option @@ -1251,3 +1223,31 @@ impl prost::message::Message for vortex_proto::scalar::ScalarValue pub fn vortex_proto::scalar::ScalarValue::clear(&mut self) pub fn vortex_proto::scalar::ScalarValue::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::StructValue + +pub vortex_proto::scalar::StructValue::fields: alloc::vec::Vec + +impl core::clone::Clone for vortex_proto::scalar::StructValue + +pub fn vortex_proto::scalar::StructValue::clone(&self) -> vortex_proto::scalar::StructValue + +impl core::cmp::PartialEq for vortex_proto::scalar::StructValue + +pub fn vortex_proto::scalar::StructValue::eq(&self, other: &vortex_proto::scalar::StructValue) -> bool + +impl core::default::Default for vortex_proto::scalar::StructValue + +pub fn vortex_proto::scalar::StructValue::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::StructValue + +pub fn vortex_proto::scalar::StructValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::StructValue + +impl prost::message::Message for vortex_proto::scalar::StructValue + +pub fn vortex_proto::scalar::StructValue::clear(&mut self) + +pub fn vortex_proto::scalar::StructValue::encoded_len(&self) -> usize From 7d5740bb60d402df6c06d81ba393758c2f4bb33e Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 4 Mar 2026 12:09:54 -0500 Subject: [PATCH 4/5] rename old list scalar constructors Signed-off-by: Connor Tsui --- .../sequence/src/compute/list_contains.rs | 2 +- encodings/sparse/src/canonical.rs | 8 +- fuzz/src/array/scalar_at.rs | 8 +- vortex-array/public-api.lock | 6 +- vortex-array/src/arrays/arbitrary.rs | 12 +- .../src/arrays/constant/vtable/canonical.rs | 18 +-- .../src/arrays/fixed_size_list/tests/basic.rs | 4 +- .../fixed_size_list/tests/degenerate.rs | 20 ++- .../arrays/fixed_size_list/tests/nested.rs | 18 +-- .../fixed_size_list/tests/nullability.rs | 14 +- .../src/arrays/fixed_size_list/tests/take.rs | 2 +- vortex-array/src/arrays/list/compute/take.rs | 12 +- vortex-array/src/arrays/list/test_harness.rs | 4 +- vortex-array/src/arrays/list/tests.rs | 16 +-- .../src/arrays/listview/tests/basic.rs | 2 +- .../src/arrays/listview/tests/nullability.rs | 4 +- vortex-array/src/arrow/record_batch.rs | 2 +- vortex-array/src/builders/fixed_size_list.rs | 57 +++++--- vortex-array/src/builders/list.rs | 21 +-- vortex-array/src/builders/listview.rs | 32 +++-- vortex-array/src/builders/tests.rs | 4 +- vortex-array/src/scalar/arrow.rs | 2 +- vortex-array/src/scalar/constructor.rs | 18 +-- vortex-array/src/scalar/proto.rs | 2 +- vortex-array/src/scalar/tests/casting.rs | 2 +- vortex-array/src/scalar/tests/nested.rs | 133 +++++++++--------- vortex-array/src/scalar/tests/nullability.rs | 46 +++--- vortex-array/src/scalar/tests/primitives.rs | 4 +- vortex-array/src/scalar/tests/round_trip.rs | 10 +- vortex-array/src/scalar/typed_view/list.rs | 56 +++++--- .../src/scalar_fn/fns/binary/compare.rs | 2 +- .../src/scalar_fn/fns/list_contains/mod.rs | 8 +- vortex-datafusion/src/convert/exprs.rs | 2 +- vortex-duckdb/src/convert/expr.rs | 2 +- vortex-duckdb/src/convert/scalar.rs | 2 +- vortex-duckdb/src/convert/table_filter.rs | 3 +- vortex-python/src/scalar/factory.rs | 4 +- 37 files changed, 323 insertions(+), 239 deletions(-) diff --git a/encodings/sequence/src/compute/list_contains.rs b/encodings/sequence/src/compute/list_contains.rs index bd4d29aa04b..06daf1e4dbb 100644 --- a/encodings/sequence/src/compute/list_contains.rs +++ b/encodings/sequence/src/compute/list_contains.rs @@ -63,7 +63,7 @@ mod tests { #[test] fn test_list_contains_seq() { - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(I32.into()), vec![1.into(), 3.into()], Nullability::Nullable, diff --git a/encodings/sparse/src/canonical.rs b/encodings/sparse/src/canonical.rs index 7711719c3b0..2a36c80b256 100644 --- a/encodings/sparse/src/canonical.rs +++ b/encodings/sparse/src/canonical.rs @@ -1241,7 +1241,7 @@ mod test { .into_array(); let indices = buffer![0u8, 2u8, 4u8].into_array(); - let fill_value = Scalar::fixed_size_list( + let fill_value = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, NonNullable)), vec![ Scalar::primitive(99i32, NonNullable), @@ -1279,7 +1279,7 @@ mod test { .into_array(); let indices = buffer![1u16, 3u16, 4u16].into_array(); - let fill_value = Scalar::fixed_size_list( + let fill_value = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, NonNullable)), vec![ Scalar::primitive(7i32, NonNullable), @@ -1326,7 +1326,7 @@ mod test { let indices = buffer![5u32, 50, 95].into_array(); // Fill value [99, 99] will appear 97 times but stored only once. - let fill_value = Scalar::fixed_size_list( + let fill_value = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, NonNullable)), vec![ Scalar::primitive(99i32, NonNullable), @@ -1384,7 +1384,7 @@ mod test { .into_array(); let indices = buffer![0u32].into_array(); - let fill_value = Scalar::fixed_size_list( + let fill_value = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, NonNullable)), vec![ Scalar::primitive(1i32, NonNullable), diff --git a/fuzz/src/array/scalar_at.rs b/fuzz/src/array/scalar_at.rs index aa806d56596..5d3704e35b5 100644 --- a/fuzz/src/array/scalar_at.rs +++ b/fuzz/src/array/scalar_at.rs @@ -54,7 +54,7 @@ pub fn scalar_at_canonical_array(canonical: Canonical, index: usize) -> VortexRe .vortex_expect("scalar_at_canonical_array should succeed in fuzz test") }) .collect(); - Scalar::list( + Scalar::list_from_scalars( Arc::new(list.dtype().clone()), children, array.dtype().nullability(), @@ -72,7 +72,11 @@ pub fn scalar_at_canonical_array(canonical: Canonical, index: usize) -> VortexRe .vortex_expect("scalar_at_canonical_array should succeed in fuzz test") }) .collect(); - Scalar::fixed_size_list(list.dtype().clone(), children, array.dtype().nullability()) + Scalar::fixed_size_list_from_scalars( + list.dtype().clone(), + children, + array.dtype().nullability(), + ) } Canonical::Struct(array) => { let field_scalars: Vec = array diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 6cb4075f8e5..4e204f74fbd 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -12234,14 +12234,14 @@ pub fn vortex_array::scalar::Scalar::extension Self -pub fn vortex_array::scalar::Scalar::fixed_size_list(element_dtype: impl core::convert::Into>, children: alloc::vec::Vec, nullability: vortex_array::dtype::Nullability) -> Self - -pub fn vortex_array::scalar::Scalar::list(element_dtype: impl core::convert::Into>, children: alloc::vec::Vec, nullability: vortex_array::dtype::Nullability) -> Self +pub fn vortex_array::scalar::Scalar::fixed_size_list_from_scalars(element_dtype: impl core::convert::Into>, children: alloc::vec::Vec, nullability: vortex_array::dtype::Nullability) -> Self pub fn vortex_array::scalar::Scalar::list_array(parent_dtype: vortex_array::dtype::DType, elem_list: vortex_array::ArrayRef) -> Self pub fn vortex_array::scalar::Scalar::list_empty(element_dtype: alloc::sync::Arc, nullability: vortex_array::dtype::Nullability) -> Self +pub fn vortex_array::scalar::Scalar::list_from_scalars(element_dtype: impl core::convert::Into>, children: alloc::vec::Vec, nullability: vortex_array::dtype::Nullability) -> Self + pub fn vortex_array::scalar::Scalar::primitive>(value: T, nullability: vortex_array::dtype::Nullability) -> Self pub fn vortex_array::scalar::Scalar::primitive_value(value: vortex_array::scalar::PValue, ptype: vortex_array::dtype::PType, nullability: vortex_array::dtype::Nullability) -> Self diff --git a/vortex-array/src/arrays/arbitrary.rs b/vortex-array/src/arrays/arbitrary.rs index 711f0c0a65f..dab78a9d58e 100644 --- a/vortex-array/src/arrays/arbitrary.rs +++ b/vortex-array/src/arrays/arbitrary.rs @@ -26,6 +26,7 @@ use crate::builders::ArrayBuilder; use crate::builders::DecimalBuilder; use crate::builders::FixedSizeListBuilder; use crate::builders::ListViewBuilder; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::dtype::IntegerPType; use crate::dtype::NativePType; @@ -33,6 +34,7 @@ use crate::dtype::Nullability; use crate::dtype::PType; use crate::match_each_decimal_value_type; use crate::scalar::Scalar; +use crate::scalar::ScalarValue; use crate::scalar::arbitrary::random_scalar; use crate::validity::Validity; @@ -247,12 +249,18 @@ fn random_list_scalar( u: &mut Unstructured, elem_dtype: &Arc, list_size: u32, - null: Nullability, + nullability: Nullability, ) -> Result { let elems = (0..list_size) .map(|_| random_scalar(u, elem_dtype)) .collect::>>()?; - Ok(Scalar::list(elem_dtype.clone(), elems, null)) + let array = build_array_from_scalars(elem_dtype, &elems); + + Scalar::try_new( + DType::List(elem_dtype.clone(), nullability), + Some(ScalarValue::Array(array)), + ) + .map_err(|_| arbitrary::Error::IncorrectFormat) } fn random_string( diff --git a/vortex-array/src/arrays/constant/vtable/canonical.rs b/vortex-array/src/arrays/constant/vtable/canonical.rs index af8c5ee8e4f..c6e0aa304ed 100644 --- a/vortex-array/src/arrays/constant/vtable/canonical.rs +++ b/vortex-array/src/arrays/constant/vtable/canonical.rs @@ -389,7 +389,7 @@ mod tests { #[test] fn test_canonicalize_lists() -> VortexResult<()> { - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U64, Nullability::NonNullable)), vec![1u64.into(), 2u64.into()], Nullability::NonNullable, @@ -414,7 +414,7 @@ mod tests { #[test] fn test_canonicalize_empty_list() { - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U64, Nullability::NonNullable)), vec![], Nullability::NonNullable, @@ -481,7 +481,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_non_null() { // Test with a non-null fixed-size list constant. - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(10i32, Nullability::NonNullable), @@ -509,7 +509,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_nullable() { // Test with a nullable but non-null fixed-size list constant. - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::F64, Nullability::NonNullable)), vec![ Scalar::primitive(1.5f64, Nullability::NonNullable), @@ -558,7 +558,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_empty() { // Test with size-0 lists (edge case). - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I8, Nullability::NonNullable)), vec![], Nullability::NonNullable, @@ -578,7 +578,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_nested() { // Test with nested data types (list of strings). - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Utf8(Nullability::NonNullable)), vec![Scalar::from("hello"), Scalar::from("world")], Nullability::NonNullable, @@ -601,7 +601,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_single_element() { // Test with a single-element list. - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I16, Nullability::NonNullable)), vec![Scalar::primitive(42i16, Nullability::NonNullable)], Nullability::NonNullable, @@ -620,7 +620,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_with_null_elements() { // Test FSL with nullable element type where some elements are null. - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(100i32, Nullability::Nullable), @@ -661,7 +661,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_large() { // Test with a large constant array. - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)), vec![ Scalar::primitive(1u8, Nullability::NonNullable), diff --git a/vortex-array/src/arrays/fixed_size_list/tests/basic.rs b/vortex-array/src/arrays/fixed_size_list/tests/basic.rs index e3cdc9b2789..1cf75e2de44 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/basic.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/basic.rs @@ -68,7 +68,7 @@ fn test_scalar_at() { let first = fsl.scalar_at(0).unwrap(); assert_eq!( first, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I32.into()), vec![1i32.into(), 2i32.into(), 3i32.into()], Nullability::NonNullable, @@ -85,7 +85,7 @@ fn test_scalar_at() { let second = fsl.scalar_at(1).unwrap(); assert_eq!( second, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I32.into()), vec![4i32.into(), 5i32.into(), 6i32.into()], Nullability::NonNullable, diff --git a/vortex-array/src/arrays/fixed_size_list/tests/degenerate.rs b/vortex-array/src/arrays/fixed_size_list/tests/degenerate.rs index d1b08fb0d04..8e34fdb9c64 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/degenerate.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/degenerate.rs @@ -58,7 +58,7 @@ fn test_fsl_size_0_length_1_non_nullable() { assert!(!scalar.is_null()); assert_eq!( scalar, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I16.into()), vec![], Nullability::NonNullable, @@ -84,7 +84,7 @@ fn test_fsl_size_0_huge_length_non_nullable() { assert!(!scalar_first.is_null()); assert_eq!( scalar_first, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I64.into()), vec![], Nullability::NonNullable, @@ -95,7 +95,7 @@ fn test_fsl_size_0_huge_length_non_nullable() { assert!(!scalar_middle.is_null()); assert_eq!( scalar_middle, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I64.into()), vec![], Nullability::NonNullable, @@ -106,7 +106,7 @@ fn test_fsl_size_0_huge_length_non_nullable() { assert!(!scalar_end.is_null()); assert_eq!( scalar_end, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I64.into()), vec![], Nullability::NonNullable, @@ -156,7 +156,11 @@ fn test_fsl_size_0_length_1_nullable_valid() { assert!(!scalar.is_null()); assert_eq!( scalar, - Scalar::fixed_size_list(Arc::new(PType::U16.into()), vec![], Nullability::Nullable,) + Scalar::fixed_size_list_from_scalars( + Arc::new(PType::U16.into()), + vec![], + Nullability::Nullable, + ) ); } @@ -205,7 +209,11 @@ fn test_fsl_size_0_length_10_nullable_mixed() { assert!(!scalar.is_null()); assert_eq!( scalar, - Scalar::fixed_size_list(Arc::new(PType::F32.into()), vec![], Nullability::Nullable,) + Scalar::fixed_size_list_from_scalars( + Arc::new(PType::F32.into()), + vec![], + Nullability::Nullable, + ) ); } else { assert!(scalar.is_null()); diff --git a/vortex-array/src/arrays/fixed_size_list/tests/nested.rs b/vortex-array/src/arrays/fixed_size_list/tests/nested.rs index f49cdd7f2b0..c7377159788 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/nested.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/nested.rs @@ -282,21 +282,21 @@ fn test_fsl_of_list() { // Add 6 lists (2 FSL * 3 lists each). // First FSL: [[1,2], [3], [4,5,6]]. list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![1i32.into(), 2i32.into()], Nullability::NonNullable, )) .unwrap(); list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![3i32.into()], Nullability::NonNullable, )) .unwrap(); list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![4i32.into(), 5i32.into(), 6i32.into()], Nullability::NonNullable, @@ -305,21 +305,21 @@ fn test_fsl_of_list() { // Second FSL: [[7], [8,9], []]. list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![7i32.into()], Nullability::NonNullable, )) .unwrap(); list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![8i32.into(), 9i32.into()], Nullability::NonNullable, )) .unwrap(); list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![], Nullability::NonNullable, @@ -363,7 +363,7 @@ fn test_fsl_of_nullable_list() { // Add 4 lists (2 FSL * 2 lists each). // First FSL: [[1,2], null]. list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::NonNullable)), vec![1u16.into(), 2u16.into()], Nullability::Nullable, @@ -373,14 +373,14 @@ fn test_fsl_of_nullable_list() { // Second FSL: [[3], [4,5]]. list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::NonNullable)), vec![3u16.into()], Nullability::Nullable, )) .unwrap(); list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::NonNullable)), vec![4u16.into(), 5u16.into()], Nullability::Nullable, diff --git a/vortex-array/src/arrays/fixed_size_list/tests/nullability.rs b/vortex-array/src/arrays/fixed_size_list/tests/nullability.rs index 4cba013426c..d19149598b7 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/nullability.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/nullability.rs @@ -33,7 +33,7 @@ fn test_nullable_fsl_with_nulls() { assert!(!first.is_null()); assert_eq!( first, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I32.into()), vec![1i32.into(), 2i32.into()], Nullability::Nullable, @@ -54,7 +54,7 @@ fn test_nullable_fsl_with_nulls() { assert!(!third.is_null()); assert_eq!( third, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I32.into()), vec![5i32.into(), 6i32.into()], Nullability::Nullable, @@ -95,7 +95,7 @@ fn test_nullable_elements_non_nullable_lists() { assert!(!first.is_null()); assert_eq!( first, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![Some(1i32).into(), None::.into(), Some(3i32).into(),], Nullability::NonNullable, @@ -107,7 +107,7 @@ fn test_nullable_elements_non_nullable_lists() { assert!(!second.is_null()); assert_eq!( second, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![Some(4i32).into(), Some(5i32).into(), None::.into(),], Nullability::NonNullable, @@ -133,7 +133,7 @@ fn test_nullable_elements_and_nullable_lists() { assert!(!first.is_null()); assert_eq!( first, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), vec![Some(10u16).into(), None::.into()], Nullability::Nullable, @@ -154,7 +154,7 @@ fn test_nullable_elements_and_nullable_lists() { assert!(!third.is_null()); assert_eq!( third, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), vec![None::.into(), None::.into()], Nullability::Nullable, @@ -187,7 +187,7 @@ fn test_alternating_nulls() { let expected_value = u8::try_from(i + 1).unwrap(); assert_eq!( scalar, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::U8.into()), vec![expected_value.into()], Nullability::Nullable, diff --git a/vortex-array/src/arrays/fixed_size_list/tests/take.rs b/vortex-array/src/arrays/fixed_size_list/tests/take.rs index 68ff369fe52..d04daa9d42c 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/take.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/take.rs @@ -167,7 +167,7 @@ fn test_take_nullable_arrays_fsl_specific( let scalars: Vec = list.into_iter().map(|v| v.into()).collect(); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( DType::Primitive(PType::I32, Nullability::NonNullable), scalars, Nullability::NonNullable, diff --git a/vortex-array/src/arrays/list/compute/take.rs b/vortex-array/src/arrays/list/compute/take.rs index 1d299516b59..2021bedb414 100644 --- a/vortex-array/src/arrays/list/compute/take.rs +++ b/vortex-array/src/arrays/list/compute/take.rs @@ -231,7 +231,7 @@ mod test { assert!(result.is_valid(0).unwrap()); assert_eq!( result.scalar_at(0).unwrap(), - Scalar::list( + Scalar::list_from_scalars( element_dtype.clone(), vec![0i32.into(), 5.into()], Nullability::Nullable @@ -243,7 +243,7 @@ mod test { assert!(result.is_valid(2).unwrap()); assert_eq!( result.scalar_at(2).unwrap(), - Scalar::list( + Scalar::list_from_scalars( element_dtype.clone(), vec![3i32.into()], Nullability::Nullable @@ -253,7 +253,7 @@ mod test { assert!(result.is_valid(3).unwrap()); assert_eq!( result.scalar_at(3).unwrap(), - Scalar::list(element_dtype, vec![], Nullability::Nullable) + Scalar::list_from_scalars(element_dtype, vec![], Nullability::Nullable) ); } @@ -311,7 +311,7 @@ mod test { assert!(result.is_valid(0).unwrap()); assert_eq!( result.scalar_at(0).unwrap(), - Scalar::list( + Scalar::list_from_scalars( element_dtype.clone(), vec![3i32.into()], Nullability::NonNullable @@ -321,7 +321,7 @@ mod test { assert!(result.is_valid(1).unwrap()); assert_eq!( result.scalar_at(1).unwrap(), - Scalar::list( + Scalar::list_from_scalars( element_dtype.clone(), vec![0i32.into(), 5.into()], Nullability::NonNullable @@ -331,7 +331,7 @@ mod test { assert!(result.is_valid(2).unwrap()); assert_eq!( result.scalar_at(2).unwrap(), - Scalar::list(element_dtype, vec![], Nullability::NonNullable) + Scalar::list_from_scalars(element_dtype, vec![], Nullability::NonNullable) ); } diff --git a/vortex-array/src/arrays/list/test_harness.rs b/vortex-array/src/arrays/list/test_harness.rs index 7ced1eb878f..542d2477a99 100644 --- a/vortex-array/src/arrays/list/test_harness.rs +++ b/vortex-array/src/arrays/list/test_harness.rs @@ -35,7 +35,7 @@ impl ListArray { ); for v in iter { - let elem = Scalar::list( + let elem = Scalar::list_from_scalars( dtype.clone(), v.into_iter().map(|x| x.into()).collect_vec(), dtype.nullability(), @@ -63,7 +63,7 @@ impl ListArray { for v in iter { if let Some(v) = v { - let elem = Scalar::list( + let elem = Scalar::list_from_scalars( dtype.clone(), v.into_iter().map(|x| x.into()).collect_vec(), dtype.nullability(), diff --git a/vortex-array/src/arrays/list/tests.rs b/vortex-array/src/arrays/list/tests.rs index 9d63372ba6b..8ee1395a17a 100644 --- a/vortex-array/src/arrays/list/tests.rs +++ b/vortex-array/src/arrays/list/tests.rs @@ -46,7 +46,7 @@ fn test_simple_list_array() { let list = ListArray::try_new(elements, offsets, validity).unwrap(); assert_eq!( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![1.into(), 2.into()], Nullability::Nullable @@ -54,7 +54,7 @@ fn test_simple_list_array() { list.scalar_at(0).unwrap() ); assert_eq!( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![3.into(), 4.into()], Nullability::Nullable @@ -62,7 +62,7 @@ fn test_simple_list_array() { list.scalar_at(1).unwrap() ); assert_eq!( - Scalar::list(Arc::new(I32.into()), vec![5.into()], Nullability::Nullable), + Scalar::list_from_scalars(Arc::new(I32.into()), vec![5.into()], Nullability::Nullable), list.scalar_at(2).unwrap() ); } @@ -375,7 +375,7 @@ fn test_offset_to_0() { ListBuilder::::with_capacity(Arc::new(I32.into()), Nullability::NonNullable, 10, 5); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![1.into(), 2.into(), 3.into()], Nullability::NonNullable, @@ -385,7 +385,7 @@ fn test_offset_to_0() { .vortex_expect("operation should succeed in test"); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![4.into(), 5.into(), 6.into()], Nullability::NonNullable, @@ -395,7 +395,7 @@ fn test_offset_to_0() { .vortex_expect("operation should succeed in test"); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![7.into(), 8.into(), 9.into()], Nullability::NonNullable, @@ -405,7 +405,7 @@ fn test_offset_to_0() { .vortex_expect("operation should succeed in test"); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![10.into(), 11.into(), 12.into()], Nullability::NonNullable, @@ -415,7 +415,7 @@ fn test_offset_to_0() { .vortex_expect("operation should succeed in test"); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![13.into(), 14.into(), 15.into()], Nullability::NonNullable, diff --git a/vortex-array/src/arrays/listview/tests/basic.rs b/vortex-array/src/arrays/listview/tests/basic.rs index 09dce5797bf..385476ba247 100644 --- a/vortex-array/src/arrays/listview/tests/basic.rs +++ b/vortex-array/src/arrays/listview/tests/basic.rs @@ -57,7 +57,7 @@ fn test_basic_listview_comprehensive() { let first_scalar = listview.scalar_at(0).unwrap(); assert_eq!( first_scalar, - Scalar::list( + Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![1i32.into(), 2i32.into(), 3i32.into()], Nullability::NonNullable, diff --git a/vortex-array/src/arrays/listview/tests/nullability.rs b/vortex-array/src/arrays/listview/tests/nullability.rs index 30b2a192ef1..464cbc32c7d 100644 --- a/vortex-array/src/arrays/listview/tests/nullability.rs +++ b/vortex-array/src/arrays/listview/tests/nullability.rs @@ -48,7 +48,7 @@ fn test_nullable_listview_comprehensive() { assert!(!first.is_null()); assert_eq!( first, - Scalar::list( + Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![1i32.into(), 2i32.into()], Nullability::Nullable, @@ -62,7 +62,7 @@ fn test_nullable_listview_comprehensive() { assert!(!third.is_null()); assert_eq!( third, - Scalar::list( + Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![5i32.into(), 6i32.into()], Nullability::Nullable, diff --git a/vortex-array/src/arrow/record_batch.rs b/vortex-array/src/arrow/record_batch.rs index 0b2d0af547d..af21e3bc99f 100644 --- a/vortex-array/src/arrow/record_batch.rs +++ b/vortex-array/src/arrow/record_batch.rs @@ -76,7 +76,7 @@ mod tests { Nullability::Nullable, ); - xs.append_scalar(&Scalar::list( + xs.append_scalar(&Scalar::list_from_scalars( xs.element_dtype().clone(), vec![1i32.into(), 2i32.into(), 3i32.into()], Nullability::Nullable, diff --git a/vortex-array/src/builders/fixed_size_list.rs b/vortex-array/src/builders/fixed_size_list.rs index f7ac75ee978..2699fa1c6d3 100644 --- a/vortex-array/src/builders/fixed_size_list.rs +++ b/vortex-array/src/builders/fixed_size_list.rs @@ -311,7 +311,7 @@ mod tests { builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype.clone(), vec![1i32.into(), 2i32.into(), 3i32.into()], NonNullable, @@ -322,7 +322,7 @@ mod tests { builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype, vec![4i32.into(), 5i32.into(), 6i32.into()], NonNullable, @@ -348,7 +348,10 @@ mod tests { // Append multiple "empty" lists. for _ in 0..100 { builder - .append_value(Scalar::fixed_size_list(dtype.clone(), vec![], NonNullable).as_list()) + .append_value( + Scalar::fixed_size_list_from_scalars(dtype.clone(), vec![], NonNullable) + .as_list(), + ) .unwrap(); } @@ -372,7 +375,8 @@ mod tests { if i % 2 == 0 { builder .append_value( - Scalar::fixed_size_list(dtype.clone(), vec![], Nullable).as_list(), + Scalar::fixed_size_list_from_scalars(dtype.clone(), vec![], Nullable) + .as_list(), ) .unwrap(); } else { @@ -398,7 +402,7 @@ mod tests { for i in 0..5 { builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype.clone(), vec![(i * 2).into(), (i * 2 + 1).into()], NonNullable, @@ -436,8 +440,12 @@ mod tests { builder .append_value( - Scalar::fixed_size_list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable) - .as_list(), + Scalar::fixed_size_list_from_scalars( + dtype.clone(), + vec![1i32.into(), 2i32.into()], + Nullable, + ) + .as_list(), ) .unwrap(); @@ -445,7 +453,12 @@ mod tests { builder .append_value( - Scalar::fixed_size_list(dtype, vec![3i32.into(), 4i32.into()], Nullable).as_list(), + Scalar::fixed_size_list_from_scalars( + dtype, + vec![3i32.into(), 4i32.into()], + Nullable, + ) + .as_list(), ) .unwrap(); @@ -465,7 +478,7 @@ mod tests { builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype.clone(), vec![ Scalar::primitive(1i32, Nullable), @@ -480,7 +493,7 @@ mod tests { builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype, vec![ Scalar::primitive(4i32, Nullable), @@ -588,7 +601,7 @@ mod tests { // Try to append a list with wrong size. let result = builder.append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype, vec![1i32.into(), 2i32.into()], // Only 2 elements, not 3. NonNullable, @@ -694,7 +707,7 @@ mod tests { // Add some initial data. builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype, vec![1i32.into(), 2i32.into(), 3i32.into()], NonNullable, @@ -719,7 +732,7 @@ mod tests { // Mix of operations. builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype, vec![ Scalar::primitive(1i32, Nullable), @@ -764,13 +777,19 @@ mod tests { let mut builder = FixedSizeListBuilder::with_capacity(dtype.clone(), 2, Nullable, 10); // Test appending a valid fixed-size list. - let list_scalar1 = - Scalar::fixed_size_list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable); + let list_scalar1 = Scalar::fixed_size_list_from_scalars( + dtype.clone(), + vec![1i32.into(), 2i32.into()], + Nullable, + ); builder.append_scalar(&list_scalar1).unwrap(); // Test appending another list. - let list_scalar2 = - Scalar::fixed_size_list(dtype.clone(), vec![3i32.into(), 4i32.into()], Nullable); + let list_scalar2 = Scalar::fixed_size_list_from_scalars( + dtype.clone(), + vec![3i32.into(), 4i32.into()], + Nullable, + ); builder.append_scalar(&list_scalar2).unwrap(); // Test appending null via builder method (since fixed-size list null handling is special). @@ -820,7 +839,7 @@ mod tests { // Interleave with a list scalar. builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype.clone(), vec![10i32.into(), 11i32.into(), 12i32.into()], NonNullable, @@ -836,7 +855,7 @@ mod tests { // Interleave with another list scalar. builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype.clone(), vec![20i32.into(), 21i32.into(), 22i32.into()], NonNullable, diff --git a/vortex-array/src/builders/list.rs b/vortex-array/src/builders/list.rs index aaabacd1c22..7dc804e40cc 100644 --- a/vortex-array/src/builders/list.rs +++ b/vortex-array/src/builders/list.rs @@ -342,7 +342,7 @@ mod tests { builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype.clone(), vec![1i32.into(), 2i32.into(), 3i32.into()], NonNullable, @@ -353,7 +353,7 @@ mod tests { builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype, vec![4i32.into(), 5i32.into(), 6i32.into()], NonNullable, @@ -390,7 +390,7 @@ mod tests { builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype.clone(), vec![1i32.into(), 2i32.into(), 3i32.into()], NonNullable, @@ -405,7 +405,7 @@ mod tests { builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype, vec![4i32.into(), 5i32.into(), 6i32.into()], NonNullable, @@ -519,11 +519,12 @@ mod tests { let mut builder = ListBuilder::::with_capacity(dtype.clone(), Nullable, 20, 10); // Test appending a valid list. - let list_scalar1 = Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable); + let list_scalar1 = + Scalar::list_from_scalars(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable); builder.append_scalar(&list_scalar1).unwrap(); // Test appending another list. - let list_scalar2 = Scalar::list( + let list_scalar2 = Scalar::list_from_scalars( dtype.clone(), vec![3i32.into(), 4i32.into(), 5i32.into()], Nullable, @@ -583,8 +584,12 @@ mod tests { // Interleave with a list scalar. builder .append_value( - Scalar::list(dtype.clone(), vec![10i32.into(), 11i32.into()], NonNullable) - .as_list(), + Scalar::list_from_scalars( + dtype.clone(), + vec![10i32.into(), 11i32.into()], + NonNullable, + ) + .as_list(), ) .unwrap(); diff --git a/vortex-array/src/builders/listview.rs b/vortex-array/src/builders/listview.rs index ba1199a02ad..96af5037ba4 100644 --- a/vortex-array/src/builders/listview.rs +++ b/vortex-array/src/builders/listview.rs @@ -465,7 +465,7 @@ mod tests { // Append a regular list. builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype.clone(), vec![1i32.into(), 2i32.into(), 3i32.into()], NonNullable, @@ -485,7 +485,8 @@ mod tests { // Append another regular list. builder .append_value( - Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(), + Scalar::list_from_scalars(dtype, vec![4i32.into(), 5i32.into()], NonNullable) + .as_list(), ) .unwrap(); @@ -520,13 +521,18 @@ mod tests { builder .append_value( - Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], NonNullable).as_list(), + Scalar::list_from_scalars( + dtype.clone(), + vec![1i32.into(), 2i32.into()], + NonNullable, + ) + .as_list(), ) .unwrap(); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype, vec![3i32.into(), 4i32.into(), 5i32.into()], NonNullable, @@ -558,7 +564,8 @@ mod tests { for i in 0..5 { builder2 .append_value( - Scalar::list(dtype2.clone(), vec![(i * 10).into()], NonNullable).as_list(), + Scalar::list_from_scalars(dtype2.clone(), vec![(i * 10).into()], NonNullable) + .as_list(), ) .unwrap(); } @@ -591,7 +598,8 @@ mod tests { assert_eq!(builder.len(), 4); // Test append_scalar. - let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable); + let list_scalar = + Scalar::list_from_scalars(dtype, vec![10i32.into(), 20i32.into()], Nullable); builder.append_scalar(&list_scalar).unwrap(); assert_eq!(builder.len(), 5); @@ -628,7 +636,9 @@ mod tests { // Add initial data. builder - .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list()) + .append_value( + Scalar::list_from_scalars(dtype, vec![0i32.into()], NonNullable).as_list(), + ) .unwrap(); // Extend from the ListArray. @@ -683,8 +693,12 @@ mod tests { // Interleave with a list scalar. builder .append_value( - Scalar::list(dtype.clone(), vec![10i32.into(), 11i32.into()], NonNullable) - .as_list(), + Scalar::list_from_scalars( + dtype.clone(), + vec![10i32.into(), 11i32.into()], + NonNullable, + ) + .as_list(), ) .unwrap(); diff --git a/vortex-array/src/builders/tests.rs b/vortex-array/src/builders/tests.rs index ec46ca8879d..0b1560aea1d 100644 --- a/vortex-array/src/builders/tests.rs +++ b/vortex-array/src/builders/tests.rs @@ -608,7 +608,7 @@ fn create_test_scalars_for_dtype(dtype: &DType, count: usize) -> Vec { _ => Scalar::default_value(element_dtype.as_ref()), }) .collect(); - Scalar::list(element_dtype.clone(), elements, *n) + Scalar::list_from_scalars(element_dtype.clone(), elements, *n) } DType::FixedSizeList(element_dtype, size, n) => { // Create fixed-size list scalars. @@ -620,7 +620,7 @@ fn create_test_scalars_for_dtype(dtype: &DType, count: usize) -> Vec { _ => Scalar::default_value(element_dtype.as_ref()), }) .collect(); - Scalar::fixed_size_list(element_dtype.clone(), elements, *n) + Scalar::fixed_size_list_from_scalars(element_dtype.clone(), elements, *n) } DType::Extension(ext_dtype) => { // Create extension scalars with storage values. diff --git a/vortex-array/src/scalar/arrow.rs b/vortex-array/src/scalar/arrow.rs index bce11e27e0f..e40edcefcb8 100644 --- a/vortex-array/src/scalar/arrow.rs +++ b/vortex-array/src/scalar/arrow.rs @@ -431,7 +431,7 @@ mod tests { #[should_panic(expected = "list scalar conversion")] fn test_list_scalar_to_arrow_todo() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( element_dtype, vec![ Scalar::primitive(1i32, Nullability::NonNullable), diff --git a/vortex-array/src/scalar/constructor.rs b/vortex-array/src/scalar/constructor.rs index 97039cdc6a0..7095bd09006 100644 --- a/vortex-array/src/scalar/constructor.rs +++ b/vortex-array/src/scalar/constructor.rs @@ -131,43 +131,45 @@ impl Scalar { unsafe { Scalar::new_unchecked(parent_dtype, Some(scalar_value)) } } - // TODO(connor): These methods need to be deprecated!!! - /// Creates a new list scalar with the given element type and children. /// + /// Callers should prefer to use the faster `Scalar::list_array` constructor when possible. + /// /// # Panics /// /// Panics if any child scalar has a different type than the element type, or if there are too /// many children. - pub fn list( + pub fn list_from_scalars( element_dtype: impl Into>, children: Vec, nullability: Nullability, ) -> Self { - Self::create_list(element_dtype, children, nullability, ListKind::Variable) + Self::create_list_from_scalars(element_dtype, children, nullability, ListKind::Variable) } /// Creates a new empty list scalar with the given element type. pub fn list_empty(element_dtype: Arc, nullability: Nullability) -> Self { - Self::create_list(element_dtype, vec![], nullability, ListKind::Variable) + Self::create_list_from_scalars(element_dtype, vec![], nullability, ListKind::Variable) } /// Creates a new fixed-size list scalar with the given element type and children. /// + /// Callers should prefer to use the faster `Scalar::list_array` constructor when possible. + /// /// # Panics /// /// Panics if any child scalar has a different type than the element type, or if there are too /// many children. - pub fn fixed_size_list( + pub fn fixed_size_list_from_scalars( element_dtype: impl Into>, children: Vec, nullability: Nullability, ) -> Self { - Self::create_list(element_dtype, children, nullability, ListKind::FixedSize) + Self::create_list_from_scalars(element_dtype, children, nullability, ListKind::FixedSize) } /// Creates a list [`Scalar`] from an element dtype, children, nullability, and list kind. - fn create_list( + fn create_list_from_scalars( element_dtype: impl Into>, children: Vec, nullability: Nullability, diff --git a/vortex-array/src/scalar/proto.rs b/vortex-array/src/scalar/proto.rs index 4bd7dbdcd4a..d4f875449a9 100644 --- a/vortex-array/src/scalar/proto.rs +++ b/vortex-array/src/scalar/proto.rs @@ -652,7 +652,7 @@ mod tests { #[test] fn test_list() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)); - round_trip(Scalar::list( + round_trip(Scalar::list_from_scalars( element_dtype, vec![ Scalar::primitive(42i32, Nullability::Nullable), diff --git a/vortex-array/src/scalar/tests/casting.rs b/vortex-array/src/scalar/tests/casting.rs index 106ef6e5f62..bccff22fd27 100644 --- a/vortex-array/src/scalar/tests/casting.rs +++ b/vortex-array/src/scalar/tests/casting.rs @@ -203,7 +203,7 @@ mod tests { let element_dtype = Arc::new(DType::Primitive(PType::F16, Nullability::NonNullable)); - let scalar = Scalar::list( + let scalar = Scalar::list_from_scalars( element_dtype, vec![ Scalar::primitive(f16_value1, Nullability::NonNullable), diff --git a/vortex-array/src/scalar/tests/nested.rs b/vortex-array/src/scalar/tests/nested.rs index a8260e2d47a..3db4071571f 100644 --- a/vortex-array/src/scalar/tests/nested.rs +++ b/vortex-array/src/scalar/tests/nested.rs @@ -23,7 +23,7 @@ mod tests { )); // Create inner FixedSizeLists. - let inner_list1 = Scalar::fixed_size_list( + let inner_list1 = Scalar::fixed_size_list_from_scalars( inner_element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -33,7 +33,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::fixed_size_list( + let inner_list2 = Scalar::fixed_size_list_from_scalars( inner_element_dtype, vec![ Scalar::primitive(4i32, Nullability::NonNullable), @@ -44,7 +44,7 @@ mod tests { ); // Create outer FixedSizeList. - let outer_list = Scalar::fixed_size_list( + let outer_list = Scalar::fixed_size_list_from_scalars( inner_dtype, vec![inner_list1, inner_list2], Nullability::NonNullable, @@ -89,7 +89,7 @@ mod tests { Nullability::NonNullable, )); - let inner_list1 = Scalar::list( + let inner_list1 = Scalar::list_from_scalars( inner_element_dtype.clone(), vec![ Scalar::primitive(10i32, Nullability::NonNullable), @@ -98,7 +98,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::list( + let inner_list2 = Scalar::list_from_scalars( inner_element_dtype, vec![ Scalar::primitive(30i32, Nullability::NonNullable), @@ -108,7 +108,7 @@ mod tests { Nullability::NonNullable, ); - let outer_fixed_list = Scalar::fixed_size_list( + let outer_fixed_list = Scalar::fixed_size_list_from_scalars( inner_dtype, vec![inner_list1, inner_list2], Nullability::NonNullable, @@ -136,7 +136,7 @@ mod tests { Nullability::NonNullable, )); - let fixed_list1 = Scalar::fixed_size_list( + let fixed_list1 = Scalar::fixed_size_list_from_scalars( inner_element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -146,7 +146,7 @@ mod tests { Nullability::NonNullable, ); - let fixed_list2 = Scalar::fixed_size_list( + let fixed_list2 = Scalar::fixed_size_list_from_scalars( inner_element_dtype, vec![ Scalar::primitive(4i32, Nullability::NonNullable), @@ -156,7 +156,7 @@ mod tests { Nullability::NonNullable, ); - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( inner_dtype, vec![fixed_list1, fixed_list2], Nullability::NonNullable, @@ -200,7 +200,7 @@ mod tests { ], ); - let fixed_list_of_structs = Scalar::fixed_size_list( + let fixed_list_of_structs = Scalar::fixed_size_list_from_scalars( Arc::new(struct_dtype), vec![struct1, struct2], Nullability::NonNullable, @@ -253,7 +253,7 @@ mod tests { Nullability::NonNullable, ); - let fixed_list_field = Scalar::fixed_size_list( + let fixed_list_field = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)), vec![ Scalar::primitive(10i64, Nullability::NonNullable), @@ -304,7 +304,7 @@ mod tests { #[test] fn test_list_cast_element_types() { // Test casting list elements between different primitive types. - let list = Scalar::list( + let list = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), vec![ Scalar::primitive(6u16, Nullability::Nullable), @@ -341,7 +341,7 @@ mod tests { #[test] fn test_list_cast_element_overflow() { // Test that casting U16 values too large for U8 fails. - let list_with_large_values = Scalar::list( + let list_with_large_values = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), vec![ Scalar::primitive(100u16, Nullability::Nullable), @@ -369,7 +369,7 @@ mod tests { #[test] fn test_list_cast_nested_lists() { // Create a list of lists. - let inner_list1 = Scalar::list( + let inner_list1 = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -377,7 +377,7 @@ mod tests { ], Nullability::NonNullable, ); - let inner_list2 = Scalar::list( + let inner_list2 = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(3i32, Nullability::NonNullable), @@ -385,7 +385,7 @@ mod tests { ], Nullability::NonNullable, ); - let nested_list = Scalar::list( + let nested_list = Scalar::list_from_scalars( Arc::from(DType::List( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), Nullability::NonNullable, @@ -409,7 +409,7 @@ mod tests { #[test] fn test_list_cast_empty_list() { // Test casting empty list. - let empty_list = Scalar::list( + let empty_list = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![], Nullability::NonNullable, @@ -437,7 +437,7 @@ mod tests { #[test] fn test_list_cast_string_elements() { // Create a list of strings. - let string_list = Scalar::list( + let string_list = Scalar::list_from_scalars( Arc::from(DType::Utf8(Nullability::NonNullable)), vec![ Scalar::utf8("hello", Nullability::NonNullable), @@ -479,7 +479,7 @@ mod tests { Scalar::primitive(20i64, Nullability::NonNullable), ], ); - let struct_list = Scalar::list( + let struct_list = Scalar::list_from_scalars( Arc::from(struct_dtype), vec![struct1, struct2], Nullability::NonNullable, @@ -501,7 +501,7 @@ mod tests { #[test] fn test_list_cast_incompatible_element_types() { // Create a list of integers. - let int_list = Scalar::list( + let int_list = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![Scalar::primitive(1i32, Nullability::NonNullable)], Nullability::NonNullable, @@ -518,7 +518,7 @@ mod tests { #[test] fn test_list_to_fixed_size_list_cast() { // Create a list with 3 elements. - let list = Scalar::list( + let list = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -542,7 +542,7 @@ mod tests { #[test] fn test_fixed_size_list_to_list_cast() { // Create a FixedSizeList with 2 elements. - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(10i32, Nullability::NonNullable), @@ -564,7 +564,7 @@ mod tests { #[test] fn test_fixed_size_list_to_fixed_size_list_cast() { // Create a FixedSizeList[4] with I32 elements. - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -588,7 +588,7 @@ mod tests { #[test] fn test_fixed_size_list_size_mismatch_error() { // Create a list with 2 elements. - let list = Scalar::list( + let list = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -639,7 +639,7 @@ mod tests { )); // Create inner FixedSizeList for struct field. - let inner_list1 = Scalar::fixed_size_list( + let inner_list1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)), vec![ Scalar::primitive(100i64, Nullability::NonNullable), @@ -648,7 +648,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::fixed_size_list( + let inner_list2 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)), vec![ Scalar::primitive(300i64, Nullability::NonNullable), @@ -675,14 +675,14 @@ mod tests { ); // Create middle FixedSizeList[2] of structs. - let middle_list = Scalar::fixed_size_list( + let middle_list = Scalar::fixed_size_list_from_scalars( struct_dtype, vec![struct1, struct2], Nullability::NonNullable, ); // Create outer List. - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( middle_fixed_list_dtype, vec![middle_list.clone(), middle_list], Nullability::NonNullable, @@ -731,7 +731,7 @@ mod tests { Nullability::NonNullable, )); - let inner_list1 = Scalar::fixed_size_list( + let inner_list1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -740,7 +740,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::fixed_size_list( + let inner_list2 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(3i32, Nullability::NonNullable), @@ -749,7 +749,7 @@ mod tests { Nullability::NonNullable, ); - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( inner_dtype, vec![inner_list1, inner_list2], Nullability::NonNullable, @@ -798,7 +798,7 @@ mod tests { )); // Create innermost FixedSizeLists. - let inner_fixed1 = Scalar::fixed_size_list( + let inner_fixed1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -807,7 +807,7 @@ mod tests { Nullability::NonNullable, ); - let inner_fixed2 = Scalar::fixed_size_list( + let inner_fixed2 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(3i32, Nullability::NonNullable), @@ -817,20 +817,20 @@ mod tests { ); // Create middle Lists. - let middle_list1 = Scalar::list( + let middle_list1 = Scalar::list_from_scalars( innermost_dtype.clone(), vec![inner_fixed1.clone()], Nullability::NonNullable, ); - let middle_list2 = Scalar::list( + let middle_list2 = Scalar::list_from_scalars( innermost_dtype, vec![inner_fixed2, inner_fixed1], Nullability::NonNullable, ); // Create outer FixedSizeList. - let outer = Scalar::fixed_size_list( + let outer = Scalar::fixed_size_list_from_scalars( middle_dtype, vec![middle_list1, middle_list2], Nullability::NonNullable, @@ -855,7 +855,7 @@ mod tests { Nullability::NonNullable, )); - let inner_list1 = Scalar::fixed_size_list( + let inner_list1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -864,7 +864,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::fixed_size_list( + let inner_list2 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(3i32, Nullability::NonNullable), @@ -873,7 +873,7 @@ mod tests { Nullability::NonNullable, ); - let outer = Scalar::fixed_size_list( + let outer = Scalar::fixed_size_list_from_scalars( inner_dtype, vec![inner_list1, inner_list2], Nullability::NonNullable, @@ -893,7 +893,8 @@ mod tests { Scalar::primitive(20i32, Nullability::NonNullable), Scalar::primitive(30i32, Nullability::NonNullable), ]; - let fixed_list = Scalar::fixed_size_list(element_dtype, children, Nullability::NonNullable); + let fixed_list = + Scalar::fixed_size_list_from_scalars(element_dtype, children, Nullability::NonNullable); assert!(matches!(fixed_list.dtype(), DType::FixedSizeList(_, 3, _))); @@ -920,8 +921,11 @@ mod tests { fn test_fixed_size_list_size_zero() { // Test FixedSizeList[0] behavior. let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let empty_fixed_list = - Scalar::fixed_size_list(element_dtype.clone(), vec![], Nullability::NonNullable); + let empty_fixed_list = Scalar::fixed_size_list_from_scalars( + element_dtype.clone(), + vec![], + Nullability::NonNullable, + ); assert!(matches!( empty_fixed_list.dtype(), @@ -942,7 +946,7 @@ mod tests { #[test] fn test_fixed_size_list_cast_to_list() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -978,7 +982,7 @@ mod tests { #[test] fn test_list_cast_to_fixed_size_list() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let list = Scalar::list( + let list = Scalar::list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(10i32, Nullability::NonNullable), @@ -1015,7 +1019,7 @@ mod tests { #[test] fn test_fixed_size_list_size_validation() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let list = Scalar::list( + let list = Scalar::list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1045,7 +1049,7 @@ mod tests { #[test] fn test_fixed_size_list_display() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( element_dtype, vec![ Scalar::primitive(100i32, Nullability::NonNullable), @@ -1065,7 +1069,7 @@ mod tests { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); // Create two equal fixed size lists. - let list1 = Scalar::fixed_size_list( + let list1 = Scalar::fixed_size_list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1074,7 +1078,7 @@ mod tests { Nullability::NonNullable, ); - let list2 = Scalar::fixed_size_list( + let list2 = Scalar::fixed_size_list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1086,7 +1090,7 @@ mod tests { assert_eq!(list1, list2); // Create a different list. - let list3 = Scalar::fixed_size_list( + let list3 = Scalar::fixed_size_list_from_scalars( element_dtype, vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1106,7 +1110,7 @@ mod tests { fn test_fixed_size_list_single_element() { // Test FixedSizeList[1]. let element_dtype = Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)); - let single_element = Scalar::fixed_size_list( + let single_element = Scalar::fixed_size_list_from_scalars( element_dtype.clone(), vec![Scalar::primitive(42i64, Nullability::NonNullable)], Nullability::NonNullable, @@ -1133,7 +1137,7 @@ mod tests { #[test] fn test_fixed_size_list_with_strings() { let element_dtype = Arc::new(DType::Utf8(Nullability::NonNullable)); - let string_list = Scalar::fixed_size_list( + let string_list = Scalar::fixed_size_list_from_scalars( element_dtype, vec![ Scalar::utf8("hello", Nullability::NonNullable), @@ -1165,7 +1169,7 @@ mod tests { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); // FixedSizeList[3] of i32 = 3 * 4 bytes = 12 bytes. - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1177,11 +1181,12 @@ mod tests { assert_eq!(fixed_list.approx_nbytes(), 12); // Empty FixedSizeList[0] = 0 bytes. - let empty_list = Scalar::fixed_size_list(element_dtype, vec![], Nullability::NonNullable); + let empty_list = + Scalar::fixed_size_list_from_scalars(element_dtype, vec![], Nullability::NonNullable); assert_eq!(empty_list.approx_nbytes(), 0); // FixedSizeList with strings. - let string_list = Scalar::fixed_size_list( + let string_list = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Utf8(Nullability::NonNullable)), vec![ Scalar::utf8("ab", Nullability::NonNullable), // 2 bytes @@ -1195,7 +1200,7 @@ mod tests { #[test] fn test_fixed_size_list_casting_with_nullability() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)); - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( element_dtype, vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -1233,7 +1238,7 @@ mod tests { #[test] fn test_list_basic_operations() { // Create a simple list. - let list = Scalar::list( + let list = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1251,7 +1256,7 @@ mod tests { #[test] fn test_list_of_lists() { // Create nested lists without FixedSizeList. - let inner_list1 = Scalar::list( + let inner_list1 = Scalar::list_from_scalars( Arc::new(DType::Utf8(Nullability::NonNullable)), vec![ Scalar::utf8("a", Nullability::NonNullable), @@ -1260,7 +1265,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::list( + let inner_list2 = Scalar::list_from_scalars( Arc::new(DType::Utf8(Nullability::NonNullable)), vec![ Scalar::utf8("c", Nullability::NonNullable), @@ -1270,7 +1275,7 @@ mod tests { Nullability::NonNullable, ); - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( Arc::new(DType::List( Arc::new(DType::Utf8(Nullability::NonNullable)), Nullability::NonNullable, @@ -1381,7 +1386,7 @@ mod tests { ], ); - let list_of_structs = Scalar::list( + let list_of_structs = Scalar::list_from_scalars( Arc::new(struct_dtype), vec![struct1, struct2, struct3], Nullability::NonNullable, @@ -1421,7 +1426,7 @@ mod tests { Nullability::NonNullable, ); - let tags_list = Scalar::list( + let tags_list = Scalar::list_from_scalars( Arc::new(DType::Utf8(Nullability::NonNullable)), vec![ Scalar::utf8("rust", Nullability::NonNullable), @@ -1468,7 +1473,7 @@ mod tests { #[test] fn test_deeply_nested_lists_only() { // Create a 3-level nested list structure without any FixedSizeList. - let level3 = Scalar::list( + let level3 = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1477,7 +1482,7 @@ mod tests { Nullability::NonNullable, ); - let level2 = Scalar::list( + let level2 = Scalar::list_from_scalars( Arc::new(DType::List( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), Nullability::NonNullable, @@ -1486,7 +1491,7 @@ mod tests { Nullability::NonNullable, ); - let level1 = Scalar::list( + let level1 = Scalar::list_from_scalars( Arc::new(DType::List( Arc::new(DType::List( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), diff --git a/vortex-array/src/scalar/tests/nullability.rs b/vortex-array/src/scalar/tests/nullability.rs index 830a6c3040c..8cbcac0bb66 100644 --- a/vortex-array/src/scalar/tests/nullability.rs +++ b/vortex-array/src/scalar/tests/nullability.rs @@ -47,7 +47,7 @@ mod tests { #[test] fn test_list_cast_nullability_changes() { - let list = Scalar::list( + let list = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), vec![Scalar::primitive(6u16, Nullability::Nullable)], Nullability::Nullable, @@ -86,7 +86,7 @@ mod tests { #[test] fn test_list_cast_with_null_elements() { - let list_with_null = Scalar::list( + let list_with_null = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), vec![ Scalar::primitive(6u16, Nullability::Nullable), @@ -146,7 +146,7 @@ mod tests { assert_eq!(struct_with_null.approx_nbytes(), 4 + 8); // Test list with null elements - let list_with_null = Scalar::list( + let list_with_null = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -187,7 +187,7 @@ mod tests { #[test] fn test_list_cast_null_to_nonnull_error() { - let list_with_null = Scalar::list( + let list_with_null = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), vec![ Scalar::primitive(6u16, Nullability::Nullable), @@ -208,7 +208,7 @@ mod tests { fn test_fixed_size_list_null_elements() { // Create FixedSizeList with null elements. let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)); - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( element_dtype, vec![ Scalar::primitive(10i32, Nullability::Nullable), @@ -249,7 +249,7 @@ mod tests { )); // First inner list: non-null container with one null element. - let inner_list1 = Scalar::fixed_size_list( + let inner_list1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -266,7 +266,7 @@ mod tests { )); // Third inner list: non-null container with non-null elements. - let inner_list3 = Scalar::fixed_size_list( + let inner_list3 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(3i32, Nullability::Nullable), @@ -275,7 +275,7 @@ mod tests { Nullability::Nullable, ); - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( inner_dtype, vec![inner_list1, inner_list2, inner_list3], Nullability::NonNullable, @@ -341,7 +341,7 @@ mod tests { assert!(!struct_view.field("name").unwrap().is_null()); // Create struct with non-null FixedSizeList containing null elements. - let fixed_list_with_nulls = Scalar::fixed_size_list( + let fixed_list_with_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I64, Nullability::Nullable)), vec![ Scalar::primitive(100i64, Nullability::Nullable), @@ -370,7 +370,7 @@ mod tests { fn test_list_partial_null_elements() { // Test List with mixture of null and non-null elements. let element_dtype = Arc::new(DType::Primitive(PType::F32, Nullability::Nullable)); - let list_with_nulls = Scalar::list( + let list_with_nulls = Scalar::list_from_scalars( element_dtype, vec![ Scalar::primitive(1.5f32, Nullability::Nullable), @@ -399,7 +399,7 @@ mod tests { // Test that casting null elements to non-nullable fails. // FixedSizeList with null elements can't cast to non-nullable elements. - let fixed_list_with_nulls = Scalar::fixed_size_list( + let fixed_list_with_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -437,7 +437,7 @@ mod tests { #[test] fn test_nullable_to_nonnullable_valid_cast() { // Test that casting nullable types to non-nullable succeeds when no nulls present. - let fixed_list_no_nulls = Scalar::fixed_size_list( + let fixed_list_no_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(10i32, Nullability::Nullable), @@ -476,7 +476,7 @@ mod tests { assert!(null_list.is_null()); // Case 2: Non-null FixedSizeList with all null elements. - let list_all_nulls = Scalar::fixed_size_list( + let list_all_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable)), @@ -491,7 +491,7 @@ mod tests { assert!(list_all_nulls.as_list().element(2).unwrap().is_null()); // Case 3: Non-null FixedSizeList with some null elements. - let list_some_nulls = Scalar::fixed_size_list( + let list_some_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -582,7 +582,7 @@ mod tests { let middle_dtype = Arc::new(DType::List(innermost_dtype.clone(), Nullability::Nullable)); // Create innermost FixedSizeLists with different null patterns. - let inner1 = Scalar::fixed_size_list( + let inner1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -597,7 +597,7 @@ mod tests { Nullability::Nullable, )); - let _inner3 = Scalar::fixed_size_list( + let _inner3 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(3i32, Nullability::Nullable), @@ -607,7 +607,7 @@ mod tests { ); // Create middle Lists. - let middle1 = Scalar::list( + let middle1 = Scalar::list_from_scalars( innermost_dtype.clone(), vec![inner1, inner2], Nullability::Nullable, @@ -616,7 +616,7 @@ mod tests { let middle2 = Scalar::null(DType::List(innermost_dtype, Nullability::Nullable)); // Create outer FixedSizeList. - let outer = Scalar::fixed_size_list( + let outer = Scalar::fixed_size_list_from_scalars( middle_dtype, vec![middle1, middle2], Nullability::NonNullable, @@ -664,7 +664,7 @@ mod tests { assert_ne!(null_list1, null_list3); // Non-null list with null elements vs null list. - let list_with_nulls = Scalar::fixed_size_list( + let list_with_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable)), @@ -713,7 +713,7 @@ mod tests { ); // Create struct with mixed null patterns. - let fixed_list_field = Scalar::fixed_size_list( + let fixed_list_field = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(10i32, Nullability::Nullable), @@ -727,7 +727,7 @@ mod tests { Nullability::Nullable, )); - let inner_fixed1 = Scalar::fixed_size_list( + let inner_fixed1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::U8, Nullability::Nullable)), vec![ Scalar::primitive(1u8, Nullability::Nullable), @@ -742,7 +742,7 @@ mod tests { Nullability::Nullable, )); - let nested_fixed_field = Scalar::fixed_size_list( + let nested_fixed_field = Scalar::fixed_size_list_from_scalars( Arc::new(DType::FixedSizeList( Arc::new(DType::Primitive(PType::U8, Nullability::Nullable)), 2, @@ -776,7 +776,7 @@ mod tests { #[test] fn test_casting_preserves_null_positions() { // Ensure that casting preserves the positions of null elements. - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), diff --git a/vortex-array/src/scalar/tests/primitives.rs b/vortex-array/src/scalar/tests/primitives.rs index d2e32ce6ef0..1eac66ffee0 100644 --- a/vortex-array/src/scalar/tests/primitives.rs +++ b/vortex-array/src/scalar/tests/primitives.rs @@ -116,7 +116,7 @@ mod tests { assert_eq!(struct_scalar.approx_nbytes(), 4 + 8); // i32 + i64 // Test list scalar - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -245,7 +245,7 @@ mod tests { assert_eq!(struct_with_null.approx_nbytes(), 4 + 8); // Test list with null elements - let list_with_null = Scalar::list( + let list_with_null = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), diff --git a/vortex-array/src/scalar/tests/round_trip.rs b/vortex-array/src/scalar/tests/round_trip.rs index c904a27f2ae..6eb0ca78389 100644 --- a/vortex-array/src/scalar/tests/round_trip.rs +++ b/vortex-array/src/scalar/tests/round_trip.rs @@ -156,7 +156,8 @@ mod tests { Scalar::primitive(2i32, Nullability::NonNullable), Scalar::primitive(3i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children.clone(), Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children.clone(), Nullability::NonNullable); // Extract as ListScalar let list_specialized = list_scalar.as_list(); @@ -209,7 +210,7 @@ mod tests { #[test] fn test_protobuf_edge_cases() { // Test empty list - let empty_list = Scalar::list( + let empty_list = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![], Nullability::NonNullable, @@ -222,7 +223,7 @@ mod tests { let inner_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); let outer_dtype = Arc::new(DType::List(inner_dtype.clone(), Nullability::NonNullable)); - let inner_list1 = Scalar::list( + let inner_list1 = Scalar::list_from_scalars( inner_dtype, vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -231,7 +232,8 @@ mod tests { Nullability::NonNullable, ); - let nested_list = Scalar::list(outer_dtype, vec![inner_list1], Nullability::NonNullable); + let nested_list = + Scalar::list_from_scalars(outer_dtype, vec![inner_list1], Nullability::NonNullable); let pb_nested = pb::Scalar::from(&nested_list); let round_tripped_nested = Scalar::from_proto(&pb_nested, &SESSION).unwrap(); diff --git a/vortex-array/src/scalar/typed_view/list.rs b/vortex-array/src/scalar/typed_view/list.rs index 8e8810ca3de..d9cdda751ce 100644 --- a/vortex-array/src/scalar/typed_view/list.rs +++ b/vortex-array/src/scalar/typed_view/list.rs @@ -304,7 +304,8 @@ mod tests { Scalar::primitive(2i32, Nullability::NonNullable), Scalar::primitive(3i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); assert_eq!(list.len(), 3); @@ -341,7 +342,8 @@ mod tests { Scalar::primitive(20i32, Nullability::NonNullable), Scalar::primitive(30i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); @@ -366,7 +368,8 @@ mod tests { Scalar::primitive(100i32, Nullability::NonNullable), Scalar::primitive(200i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); let elements = list.elements().unwrap(); @@ -389,7 +392,8 @@ mod tests { Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); let display = format!("{list}"); @@ -404,13 +408,15 @@ mod tests { Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar1 = Scalar::list(element_dtype.clone(), children1, Nullability::NonNullable); + let list_scalar1 = + Scalar::list_from_scalars(element_dtype.clone(), children1, Nullability::NonNullable); let children2 = vec![ Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar2 = Scalar::list(element_dtype, children2, Nullability::NonNullable); + let list_scalar2 = + Scalar::list_from_scalars(element_dtype, children2, Nullability::NonNullable); let list1 = list_scalar1.as_list(); let list2 = list_scalar2.as_list(); @@ -425,13 +431,15 @@ mod tests { Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar1 = Scalar::list(element_dtype.clone(), children1, Nullability::NonNullable); + let list_scalar1 = + Scalar::list_from_scalars(element_dtype.clone(), children1, Nullability::NonNullable); let children2 = vec![ Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(3i32, Nullability::NonNullable), ]; - let list_scalar2 = Scalar::list(element_dtype, children2, Nullability::NonNullable); + let list_scalar2 = + Scalar::list_from_scalars(element_dtype, children2, Nullability::NonNullable); let list1 = list_scalar1.as_list(); let list2 = list_scalar2.as_list(); @@ -444,10 +452,12 @@ mod tests { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); let children1 = vec![Scalar::primitive(1i32, Nullability::NonNullable)]; - let list_scalar1 = Scalar::list(element_dtype.clone(), children1, Nullability::NonNullable); + let list_scalar1 = + Scalar::list_from_scalars(element_dtype.clone(), children1, Nullability::NonNullable); let children2 = vec![Scalar::primitive(2i32, Nullability::NonNullable)]; - let list_scalar2 = Scalar::list(element_dtype, children2, Nullability::NonNullable); + let list_scalar2 = + Scalar::list_from_scalars(element_dtype, children2, Nullability::NonNullable); let list1 = list_scalar1.as_list(); let list2 = list_scalar2.as_list(); @@ -461,10 +471,12 @@ mod tests { let element_dtype2 = Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)); let children1 = vec![Scalar::primitive(1i32, Nullability::NonNullable)]; - let list_scalar1 = Scalar::list(element_dtype1, children1, Nullability::NonNullable); + let list_scalar1 = + Scalar::list_from_scalars(element_dtype1, children1, Nullability::NonNullable); let children2 = vec![Scalar::primitive(1i64, Nullability::NonNullable)]; - let list_scalar2 = Scalar::list(element_dtype2, children2, Nullability::NonNullable); + let list_scalar2 = + Scalar::list_from_scalars(element_dtype2, children2, Nullability::NonNullable); let list1 = list_scalar1.as_list(); let list2 = list_scalar2.as_list(); @@ -483,7 +495,8 @@ mod tests { Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); @@ -506,7 +519,8 @@ mod tests { Scalar::primitive(20i32, Nullability::NonNullable), Scalar::primitive(30i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let vec: Vec = Vec::try_from(&list_scalar).unwrap(); assert_eq!(vec, vec![10, 20, 30]); @@ -528,7 +542,8 @@ mod tests { Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); @@ -553,7 +568,7 @@ mod tests { let children = vec![ Scalar::primitive(1i64, Nullability::NonNullable), // Wrong type! ]; - Scalar::list(element_dtype, children, Nullability::NonNullable); + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); } #[test] @@ -569,7 +584,8 @@ mod tests { Scalar::utf8("hello".to_string(), Nullability::NonNullable), Scalar::utf8("world".to_string(), Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); assert_eq!(list.len(), 2); @@ -589,7 +605,7 @@ mod tests { Nullability::NonNullable, )); - let inner_list1 = Scalar::list( + let inner_list1 = Scalar::list_from_scalars( inner_element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -598,7 +614,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::list( + let inner_list2 = Scalar::list_from_scalars( inner_element_dtype, vec![ Scalar::primitive(3i32, Nullability::NonNullable), @@ -607,7 +623,7 @@ mod tests { Nullability::NonNullable, ); - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( inner_list_dtype, vec![inner_list1, inner_list2], Nullability::NonNullable, diff --git a/vortex-array/src/scalar_fn/fns/binary/compare.rs b/vortex-array/src/scalar_fn/fns/binary/compare.rs index 3730b13cfe8..8b79d385e23 100644 --- a/vortex-array/src/scalar_fn/fns/binary/compare.rs +++ b/vortex-array/src/scalar_fn/fns/binary/compare.rs @@ -397,7 +397,7 @@ mod tests { ) .unwrap(); - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![3i32.into(), 4i32.into()], Nullability::NonNullable, diff --git a/vortex-array/src/scalar_fn/fns/list_contains/mod.rs b/vortex-array/src/scalar_fn/fns/list_contains/mod.rs index 4bc17f9f89a..c37f46a02c6 100644 --- a/vortex-array/src/scalar_fn/fns/list_contains/mod.rs +++ b/vortex-array/src/scalar_fn/fns/list_contains/mod.rs @@ -612,7 +612,7 @@ mod tests { #[test] pub fn list_falsification() { let expr = list_contains( - lit(Scalar::list( + lit(Scalar::list_from_scalars( Arc::new(DType::Primitive(I32, Nullability::NonNullable)), vec![1.into(), 2.into(), 3.into()], Nullability::NonNullable, @@ -663,7 +663,7 @@ mod tests { let arr = test_array(); // Both list and needle are constants - should use scalar optimization - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(I32, Nullability::NonNullable)), vec![1.into(), 2.into(), 3.into()], Nullability::NonNullable, @@ -786,7 +786,7 @@ mod tests { #[test] fn test_constant_list() { let list_array = ConstantArray::new( - Scalar::list( + Scalar::list_from_scalars( Arc::new(DType::Primitive(I32, Nullability::NonNullable)), vec![1i32.into(), 2i32.into(), 3i32.into()], Nullability::NonNullable, @@ -826,7 +826,7 @@ mod tests { #[test] fn test_list_array_element() { - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(I32, Nullability::NonNullable)), vec![1.into(), 3.into(), 6.into()], Nullability::NonNullable, diff --git a/vortex-datafusion/src/convert/exprs.rs b/vortex-datafusion/src/convert/exprs.rs index b0380db5cb3..6f23f1beeb1 100644 --- a/vortex-datafusion/src/convert/exprs.rs +++ b/vortex-datafusion/src/convert/exprs.rs @@ -221,7 +221,7 @@ impl ExpressionConvertor for DefaultExpressionConvertor { }) .try_collect()?; - let list = Scalar::list( + let list = Scalar::list_from_scalars( list_elements[0].dtype().clone(), list_elements, Nullability::Nullable, diff --git a/vortex-duckdb/src/convert/expr.rs b/vortex-duckdb/src/convert/expr.rs index c2d5a37813d..a718c2d52f1 100644 --- a/vortex-duckdb/src/convert/expr.rs +++ b/vortex-duckdb/src/convert/expr.rs @@ -139,7 +139,7 @@ pub fn try_from_bound_expression( else { return Ok(None); }; - let list = Scalar::list( + let list = Scalar::list_from_scalars( Arc::new(list_elements[0].dtype().clone()), list_elements, Nullability::Nullable, diff --git a/vortex-duckdb/src/convert/scalar.rs b/vortex-duckdb/src/convert/scalar.rs index 97f788dd39d..8cdac87c5d2 100644 --- a/vortex-duckdb/src/convert/scalar.rs +++ b/vortex-duckdb/src/convert/scalar.rs @@ -321,7 +321,7 @@ impl<'a> TryFrom<&'a ValueRef> for Scalar { Nullable, )), ExtractedValue::List(vs) => match dtype { - DType::List(c, _) => Ok(Scalar::list( + DType::List(c, _) => Ok(Scalar::list_from_scalars( c, vs.into_iter() .map(Scalar::try_from) diff --git a/vortex-duckdb/src/convert/table_filter.rs b/vortex-duckdb/src/convert/table_filter.rs index 702cdef695e..75d79b25c16 100644 --- a/vortex-duckdb/src/convert/table_filter.rs +++ b/vortex-duckdb/src/convert/table_filter.rs @@ -81,7 +81,8 @@ pub fn try_from_table_filter( "IN filter must have at least one value" ); let dtype = scalars[0].dtype().clone(); - let list_scalar = Scalar::list(Arc::new(dtype), scalars, Nullability::Nullable); + let list_scalar = + Scalar::list_from_scalars(Arc::new(dtype), scalars, Nullability::Nullable); list_contains(lit(list_scalar), col.clone()) } TableFilterClass::Dynamic(dynamic) => { diff --git a/vortex-python/src/scalar/factory.rs b/vortex-python/src/scalar/factory.rs index d4a0e000cf1..ace38abf9f7 100644 --- a/vortex-python/src/scalar/factory.rs +++ b/vortex-python/src/scalar/factory.rs @@ -183,7 +183,7 @@ fn scalar_helper_inner(value: &Bound<'_, PyAny>, dtype: Option<&DType>) -> PyRes .iter() .map(|e| scalar_helper_inner(&e, Some(element_dtype))) .try_collect()?; - Scalar::list(element_dtype.clone(), elements, Nullability::NonNullable); + Scalar::list_from_scalars(element_dtype.clone(), elements, Nullability::NonNullable); } else { // If no dtype was provided, we need to infer the element dtype from the list contents. // We do this in a greedy way taking the first element dtype we find. @@ -198,7 +198,7 @@ fn scalar_helper_inner(value: &Bound<'_, PyAny>, dtype: Option<&DType>) -> PyRes elements.push(scalar); } - return Ok(Scalar::list( + return Ok(Scalar::list_from_scalars( element_dtype .map(Arc::new) // Empty list defaults to Null dtype From 7f11f0c370039746702f0b148333566758301fd7 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 4 Mar 2026 13:14:36 -0500 Subject: [PATCH 5/5] fix builders Signed-off-by: Connor Tsui --- vortex-array/src/builders/fixed_size_list.rs | 20 ++------- vortex-array/src/builders/list.rs | 30 +++----------- vortex-array/src/builders/listview.rs | 43 +++----------------- 3 files changed, 15 insertions(+), 78 deletions(-) diff --git a/vortex-array/src/builders/fixed_size_list.rs b/vortex-array/src/builders/fixed_size_list.rs index 2699fa1c6d3..910064de05b 100644 --- a/vortex-array/src/builders/fixed_size_list.rs +++ b/vortex-array/src/builders/fixed_size_list.rs @@ -108,10 +108,10 @@ impl FixedSizeListBuilder { /// /// [`ListArray`]: crate::arrays::ListArray pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> { - if value.is_null() { + let Some(array) = value.as_array() else { self.append_null(); return Ok(()); - } + }; if value.len() != self.list_size() as usize { vortex_bail!( @@ -122,21 +122,7 @@ impl FixedSizeListBuilder { ); } - // Fast-path: if the list scalar is backed by an array, append the entire thing directly. - if let Some(array) = value.as_array() { - return self.append_array_as_list(array); - } - - // Otherwise we must append the individual scalars. - let elements = value - .elements() - .vortex_expect("non-null ListScalar must have elements"); - for scalar in elements { - self.elements_builder.append_scalar(&scalar)?; - } - self.nulls.append_non_null(); - - Ok(()) + self.append_array_as_list(array) } /// Finishes the builder directly into a [`FixedSizeListArray`]. diff --git a/vortex-array/src/builders/list.rs b/vortex-array/src/builders/list.rs index 7dc804e40cc..09e6ed51319 100644 --- a/vortex-array/src/builders/list.rs +++ b/vortex-array/src/builders/list.rs @@ -110,31 +110,13 @@ impl ListBuilder { /// Appends a list `value` to the builder. pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> { - if value.is_null() { - self.append_null(); - return Ok(()); - } - - // Fast-path: if the list scalar is backed by an array, append the entire thing directly. - if let Some(array) = value.as_array() { - return self.append_array_as_list(array); - } - - // Otherwise, we must append the individual scalars. - let elements = value - .elements() - .vortex_expect("non-null ListScalar must have elements"); - for scalar in elements { - self.elements_builder.append_scalar(&scalar)?; + match value.as_array() { + Some(array) => self.append_array_as_list(array), + None => { + self.append_null(); + Ok(()) + } } - - self.nulls.append_non_null(); - self.offsets_builder.append_value( - O::from_usize(self.elements_builder.len()) - .vortex_expect("Failed to convert from usize to O"), - ); - - Ok(()) } /// Finishes the builder directly into a [`ListArray`]. diff --git a/vortex-array/src/builders/listview.rs b/vortex-array/src/builders/listview.rs index 96af5037ba4..15bead3225a 100644 --- a/vortex-array/src/builders/listview.rs +++ b/vortex-array/src/builders/listview.rs @@ -152,44 +152,13 @@ impl ListViewBuilder { /// This method extends the value builder with the provided values and records /// the offset and size of the new list. pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> { - if value.is_null() { - self.append_null(); - return Ok(()); - } - - // Fast-path: if the list scalar is backed by an array, append the entire thing directly. - if let Some(array) = value.as_array() { - return self.append_array_as_list(array); - } - - // Otherwise, we must append the individual scalars. - let elements = value - .elements() - .vortex_expect("non-null ListScalar must have elements"); - - let curr_offset = self.elements_builder.len(); - let num_elements = elements.len(); - - // We must assert this even in release mode to ensure that the safety comment in - // `finish_into_listview` is correct. - assert!( - ((curr_offset + num_elements) as u64) < O::max_value_as_u64(), - "appending this list would cause an offset overflow" - ); - - for scalar in elements { - self.elements_builder.append_scalar(&scalar)?; + match value.as_array() { + Some(array) => self.append_array_as_list(array), + None => { + self.append_null(); + Ok(()) + } } - self.nulls.append_non_null(); - - self.offsets_builder.append_value( - O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"), - ); - self.sizes_builder.append_value( - S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"), - ); - - Ok(()) } /// Finishes the builder directly into a [`ListViewArray`].