diff --git a/encodings/sequence/src/compute/list_contains.rs b/encodings/sequence/src/compute/list_contains.rs index bd4d29aa04b..06daf1e4dbb 100644 --- a/encodings/sequence/src/compute/list_contains.rs +++ b/encodings/sequence/src/compute/list_contains.rs @@ -63,7 +63,7 @@ mod tests { #[test] fn test_list_contains_seq() { - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(I32.into()), vec![1.into(), 3.into()], Nullability::Nullable, diff --git a/encodings/sparse/src/canonical.rs b/encodings/sparse/src/canonical.rs index 7711719c3b0..2a36c80b256 100644 --- a/encodings/sparse/src/canonical.rs +++ b/encodings/sparse/src/canonical.rs @@ -1241,7 +1241,7 @@ mod test { .into_array(); let indices = buffer![0u8, 2u8, 4u8].into_array(); - let fill_value = Scalar::fixed_size_list( + let fill_value = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, NonNullable)), vec![ Scalar::primitive(99i32, NonNullable), @@ -1279,7 +1279,7 @@ mod test { .into_array(); let indices = buffer![1u16, 3u16, 4u16].into_array(); - let fill_value = Scalar::fixed_size_list( + let fill_value = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, NonNullable)), vec![ Scalar::primitive(7i32, NonNullable), @@ -1326,7 +1326,7 @@ mod test { let indices = buffer![5u32, 50, 95].into_array(); // Fill value [99, 99] will appear 97 times but stored only once. - let fill_value = Scalar::fixed_size_list( + let fill_value = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, NonNullable)), vec![ Scalar::primitive(99i32, NonNullable), @@ -1384,7 +1384,7 @@ mod test { .into_array(); let indices = buffer![0u32].into_array(); - let fill_value = Scalar::fixed_size_list( + let fill_value = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, NonNullable)), vec![ Scalar::primitive(1i32, NonNullable), diff --git a/fuzz/src/array/scalar_at.rs b/fuzz/src/array/scalar_at.rs index aa806d56596..5d3704e35b5 100644 --- a/fuzz/src/array/scalar_at.rs +++ b/fuzz/src/array/scalar_at.rs @@ -54,7 +54,7 @@ pub fn scalar_at_canonical_array(canonical: Canonical, index: usize) -> VortexRe .vortex_expect("scalar_at_canonical_array should succeed in fuzz test") }) .collect(); - Scalar::list( + Scalar::list_from_scalars( Arc::new(list.dtype().clone()), children, array.dtype().nullability(), @@ -72,7 +72,11 @@ pub fn scalar_at_canonical_array(canonical: Canonical, index: usize) -> VortexRe .vortex_expect("scalar_at_canonical_array should succeed in fuzz test") }) .collect(); - Scalar::fixed_size_list(list.dtype().clone(), children, array.dtype().nullability()) + Scalar::fixed_size_list_from_scalars( + list.dtype().clone(), + children, + array.dtype().nullability(), + ) } Canonical::Struct(array) => { let field_scalars: Vec = array diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index e4a72149f27..4e204f74fbd 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -5596,6 +5596,8 @@ pub fn vortex_array::builders::PrimitiveBuilder::set_validity(&mut self, vali pub unsafe fn vortex_array::builders::PrimitiveBuilder::set_validity_unchecked(&mut self, validity: vortex_mask::Mask) +pub fn vortex_array::builders::build_array_from_scalars(dtype: &vortex_array::dtype::DType, scalars: &[vortex_array::scalar::Scalar]) -> vortex_array::ArrayRef + pub fn vortex_array::builders::builder_with_capacity(dtype: &vortex_array::dtype::DType, capacity: usize) -> alloc::boxed::Box pub mod vortex_array::builtins @@ -11502,16 +11504,18 @@ pub fn vortex_array::variants::PrimitiveTyped<'_>::index_lt(&self, idx: usize, e pub enum vortex_array::scalar::ScalarValue +pub vortex_array::scalar::ScalarValue::Array(vortex_array::ArrayRef) + pub vortex_array::scalar::ScalarValue::Binary(vortex_buffer::ByteBuffer) pub vortex_array::scalar::ScalarValue::Bool(bool) pub vortex_array::scalar::ScalarValue::Decimal(vortex_array::scalar::DecimalValue) -pub vortex_array::scalar::ScalarValue::List(alloc::vec::Vec>) - pub vortex_array::scalar::ScalarValue::Primitive(vortex_array::scalar::PValue) +pub vortex_array::scalar::ScalarValue::Struct(alloc::vec::Vec>) + pub vortex_array::scalar::ScalarValue::Utf8(vortex_buffer::string::BufferString) impl vortex_array::scalar::ScalarValue @@ -11522,10 +11526,12 @@ pub fn vortex_array::scalar::ScalarValue::as_bool(&self) -> bool pub fn vortex_array::scalar::ScalarValue::as_decimal(&self) -> &vortex_array::scalar::DecimalValue -pub fn vortex_array::scalar::ScalarValue::as_list(&self) -> &[core::option::Option] +pub fn vortex_array::scalar::ScalarValue::as_list(&self) -> &vortex_array::ArrayRef pub fn vortex_array::scalar::ScalarValue::as_primitive(&self) -> &vortex_array::scalar::PValue +pub fn vortex_array::scalar::ScalarValue::as_struct_fields(&self) -> &[core::option::Option] + pub fn vortex_array::scalar::ScalarValue::as_utf8(&self) -> &vortex_buffer::string::BufferString pub fn vortex_array::scalar::ScalarValue::into_binary(self) -> vortex_buffer::ByteBuffer @@ -11534,10 +11540,12 @@ pub fn vortex_array::scalar::ScalarValue::into_bool(self) -> bool pub fn vortex_array::scalar::ScalarValue::into_decimal(self) -> vortex_array::scalar::DecimalValue -pub fn vortex_array::scalar::ScalarValue::into_list(self) -> alloc::vec::Vec> +pub fn vortex_array::scalar::ScalarValue::into_list(self) -> vortex_array::ArrayRef pub fn vortex_array::scalar::ScalarValue::into_primitive(self) -> vortex_array::scalar::PValue +pub fn vortex_array::scalar::ScalarValue::into_struct_fields(self) -> alloc::vec::Vec> + pub fn vortex_array::scalar::ScalarValue::into_utf8(self) -> vortex_buffer::string::BufferString impl vortex_array::scalar::ScalarValue @@ -11560,7 +11568,7 @@ impl core::cmp::Eq for vortex_array::scalar::ScalarValue impl core::cmp::PartialEq for vortex_array::scalar::ScalarValue -pub fn vortex_array::scalar::ScalarValue::eq(&self, other: &vortex_array::scalar::ScalarValue) -> bool +pub fn vortex_array::scalar::ScalarValue::eq(&self, other: &Self) -> bool impl core::cmp::PartialOrd for vortex_array::scalar::ScalarValue @@ -11732,9 +11740,7 @@ pub fn vortex_array::scalar::ScalarValue::fmt(&self, f: &mut core::fmt::Formatte impl core::hash::Hash for vortex_array::scalar::ScalarValue -pub fn vortex_array::scalar::ScalarValue::hash<__H: core::hash::Hasher>(&self, state: &mut __H) - -impl core::marker::StructuralPartialEq for vortex_array::scalar::ScalarValue +pub fn vortex_array::scalar::ScalarValue::hash(&self, state: &mut H) impl core::convert::From> for vortex_array::scalar::ScalarValue where T: vortex_array::dtype::NativeDType, vortex_array::scalar::Scalar: core::convert::From @@ -12010,6 +12016,8 @@ pub struct vortex_array::scalar::ListScalar<'a> impl<'a> vortex_array::scalar::ListScalar<'a> +pub fn vortex_array::scalar::ListScalar<'a>::as_array(&self) -> core::option::Option<&'a vortex_array::ArrayRef> + pub fn vortex_array::scalar::ListScalar<'a>::dtype(&self) -> &'a vortex_array::dtype::DType pub fn vortex_array::scalar::ListScalar<'a>::element(&self, idx: usize) -> core::option::Option @@ -12226,12 +12234,14 @@ pub fn vortex_array::scalar::Scalar::extension Self -pub fn vortex_array::scalar::Scalar::fixed_size_list(element_dtype: impl core::convert::Into>, children: alloc::vec::Vec, nullability: vortex_array::dtype::Nullability) -> Self +pub fn vortex_array::scalar::Scalar::fixed_size_list_from_scalars(element_dtype: impl core::convert::Into>, children: alloc::vec::Vec, nullability: vortex_array::dtype::Nullability) -> Self -pub fn vortex_array::scalar::Scalar::list(element_dtype: impl core::convert::Into>, children: alloc::vec::Vec, nullability: vortex_array::dtype::Nullability) -> Self +pub fn vortex_array::scalar::Scalar::list_array(parent_dtype: vortex_array::dtype::DType, elem_list: vortex_array::ArrayRef) -> Self pub fn vortex_array::scalar::Scalar::list_empty(element_dtype: alloc::sync::Arc, nullability: vortex_array::dtype::Nullability) -> Self +pub fn vortex_array::scalar::Scalar::list_from_scalars(element_dtype: impl core::convert::Into>, children: alloc::vec::Vec, nullability: vortex_array::dtype::Nullability) -> Self + pub fn vortex_array::scalar::Scalar::primitive>(value: T, nullability: vortex_array::dtype::Nullability) -> Self pub fn vortex_array::scalar::Scalar::primitive_value(value: vortex_array::scalar::PValue, ptype: vortex_array::dtype::PType, nullability: vortex_array::dtype::Nullability) -> Self @@ -15802,6 +15812,12 @@ impl core::clone::Clone for vortex_array::serde::ArrayParts pub fn vortex_array::serde::ArrayParts::clone(&self) -> vortex_array::serde::ArrayParts +impl core::convert::TryFrom<&[u8]> for vortex_array::serde::ArrayParts + +pub type vortex_array::serde::ArrayParts::Error = vortex_error::VortexError + +pub fn vortex_array::serde::ArrayParts::try_from(value: &[u8]) -> core::result::Result + impl core::convert::TryFrom for vortex_array::serde::ArrayParts pub type vortex_array::serde::ArrayParts::Error = vortex_error::VortexError diff --git a/vortex-array/src/arrays/arbitrary.rs b/vortex-array/src/arrays/arbitrary.rs index 711f0c0a65f..dab78a9d58e 100644 --- a/vortex-array/src/arrays/arbitrary.rs +++ b/vortex-array/src/arrays/arbitrary.rs @@ -26,6 +26,7 @@ use crate::builders::ArrayBuilder; use crate::builders::DecimalBuilder; use crate::builders::FixedSizeListBuilder; use crate::builders::ListViewBuilder; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::dtype::IntegerPType; use crate::dtype::NativePType; @@ -33,6 +34,7 @@ use crate::dtype::Nullability; use crate::dtype::PType; use crate::match_each_decimal_value_type; use crate::scalar::Scalar; +use crate::scalar::ScalarValue; use crate::scalar::arbitrary::random_scalar; use crate::validity::Validity; @@ -247,12 +249,18 @@ fn random_list_scalar( u: &mut Unstructured, elem_dtype: &Arc, list_size: u32, - null: Nullability, + nullability: Nullability, ) -> Result { let elems = (0..list_size) .map(|_| random_scalar(u, elem_dtype)) .collect::>>()?; - Ok(Scalar::list(elem_dtype.clone(), elems, null)) + let array = build_array_from_scalars(elem_dtype, &elems); + + Scalar::try_new( + DType::List(elem_dtype.clone(), nullability), + Some(ScalarValue::Array(array)), + ) + .map_err(|_| arbitrary::Error::IncorrectFormat) } fn random_string( diff --git a/vortex-array/src/arrays/constant/vtable/canonical.rs b/vortex-array/src/arrays/constant/vtable/canonical.rs index af8c5ee8e4f..c6e0aa304ed 100644 --- a/vortex-array/src/arrays/constant/vtable/canonical.rs +++ b/vortex-array/src/arrays/constant/vtable/canonical.rs @@ -389,7 +389,7 @@ mod tests { #[test] fn test_canonicalize_lists() -> VortexResult<()> { - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U64, Nullability::NonNullable)), vec![1u64.into(), 2u64.into()], Nullability::NonNullable, @@ -414,7 +414,7 @@ mod tests { #[test] fn test_canonicalize_empty_list() { - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U64, Nullability::NonNullable)), vec![], Nullability::NonNullable, @@ -481,7 +481,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_non_null() { // Test with a non-null fixed-size list constant. - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(10i32, Nullability::NonNullable), @@ -509,7 +509,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_nullable() { // Test with a nullable but non-null fixed-size list constant. - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::F64, Nullability::NonNullable)), vec![ Scalar::primitive(1.5f64, Nullability::NonNullable), @@ -558,7 +558,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_empty() { // Test with size-0 lists (edge case). - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I8, Nullability::NonNullable)), vec![], Nullability::NonNullable, @@ -578,7 +578,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_nested() { // Test with nested data types (list of strings). - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Utf8(Nullability::NonNullable)), vec![Scalar::from("hello"), Scalar::from("world")], Nullability::NonNullable, @@ -601,7 +601,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_single_element() { // Test with a single-element list. - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I16, Nullability::NonNullable)), vec![Scalar::primitive(42i16, Nullability::NonNullable)], Nullability::NonNullable, @@ -620,7 +620,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_with_null_elements() { // Test FSL with nullable element type where some elements are null. - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(100i32, Nullability::Nullable), @@ -661,7 +661,7 @@ mod tests { #[test] fn test_canonicalize_fixed_size_list_large() { // Test with a large constant array. - let fsl_scalar = Scalar::fixed_size_list( + let fsl_scalar = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)), vec![ Scalar::primitive(1u8, Nullability::NonNullable), diff --git a/vortex-array/src/arrays/fixed_size_list/tests/basic.rs b/vortex-array/src/arrays/fixed_size_list/tests/basic.rs index e3cdc9b2789..1cf75e2de44 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/basic.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/basic.rs @@ -68,7 +68,7 @@ fn test_scalar_at() { let first = fsl.scalar_at(0).unwrap(); assert_eq!( first, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I32.into()), vec![1i32.into(), 2i32.into(), 3i32.into()], Nullability::NonNullable, @@ -85,7 +85,7 @@ fn test_scalar_at() { let second = fsl.scalar_at(1).unwrap(); assert_eq!( second, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I32.into()), vec![4i32.into(), 5i32.into(), 6i32.into()], Nullability::NonNullable, diff --git a/vortex-array/src/arrays/fixed_size_list/tests/degenerate.rs b/vortex-array/src/arrays/fixed_size_list/tests/degenerate.rs index d1b08fb0d04..8e34fdb9c64 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/degenerate.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/degenerate.rs @@ -58,7 +58,7 @@ fn test_fsl_size_0_length_1_non_nullable() { assert!(!scalar.is_null()); assert_eq!( scalar, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I16.into()), vec![], Nullability::NonNullable, @@ -84,7 +84,7 @@ fn test_fsl_size_0_huge_length_non_nullable() { assert!(!scalar_first.is_null()); assert_eq!( scalar_first, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I64.into()), vec![], Nullability::NonNullable, @@ -95,7 +95,7 @@ fn test_fsl_size_0_huge_length_non_nullable() { assert!(!scalar_middle.is_null()); assert_eq!( scalar_middle, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I64.into()), vec![], Nullability::NonNullable, @@ -106,7 +106,7 @@ fn test_fsl_size_0_huge_length_non_nullable() { assert!(!scalar_end.is_null()); assert_eq!( scalar_end, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I64.into()), vec![], Nullability::NonNullable, @@ -156,7 +156,11 @@ fn test_fsl_size_0_length_1_nullable_valid() { assert!(!scalar.is_null()); assert_eq!( scalar, - Scalar::fixed_size_list(Arc::new(PType::U16.into()), vec![], Nullability::Nullable,) + Scalar::fixed_size_list_from_scalars( + Arc::new(PType::U16.into()), + vec![], + Nullability::Nullable, + ) ); } @@ -205,7 +209,11 @@ fn test_fsl_size_0_length_10_nullable_mixed() { assert!(!scalar.is_null()); assert_eq!( scalar, - Scalar::fixed_size_list(Arc::new(PType::F32.into()), vec![], Nullability::Nullable,) + Scalar::fixed_size_list_from_scalars( + Arc::new(PType::F32.into()), + vec![], + Nullability::Nullable, + ) ); } else { assert!(scalar.is_null()); diff --git a/vortex-array/src/arrays/fixed_size_list/tests/nested.rs b/vortex-array/src/arrays/fixed_size_list/tests/nested.rs index f49cdd7f2b0..c7377159788 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/nested.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/nested.rs @@ -282,21 +282,21 @@ fn test_fsl_of_list() { // Add 6 lists (2 FSL * 3 lists each). // First FSL: [[1,2], [3], [4,5,6]]. list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![1i32.into(), 2i32.into()], Nullability::NonNullable, )) .unwrap(); list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![3i32.into()], Nullability::NonNullable, )) .unwrap(); list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![4i32.into(), 5i32.into(), 6i32.into()], Nullability::NonNullable, @@ -305,21 +305,21 @@ fn test_fsl_of_list() { // Second FSL: [[7], [8,9], []]. list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![7i32.into()], Nullability::NonNullable, )) .unwrap(); list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![8i32.into(), 9i32.into()], Nullability::NonNullable, )) .unwrap(); list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![], Nullability::NonNullable, @@ -363,7 +363,7 @@ fn test_fsl_of_nullable_list() { // Add 4 lists (2 FSL * 2 lists each). // First FSL: [[1,2], null]. list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::NonNullable)), vec![1u16.into(), 2u16.into()], Nullability::Nullable, @@ -373,14 +373,14 @@ fn test_fsl_of_nullable_list() { // Second FSL: [[3], [4,5]]. list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::NonNullable)), vec![3u16.into()], Nullability::Nullable, )) .unwrap(); list_builder - .append_scalar(&Scalar::list( + .append_scalar(&Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::NonNullable)), vec![4u16.into(), 5u16.into()], Nullability::Nullable, diff --git a/vortex-array/src/arrays/fixed_size_list/tests/nullability.rs b/vortex-array/src/arrays/fixed_size_list/tests/nullability.rs index 4cba013426c..d19149598b7 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/nullability.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/nullability.rs @@ -33,7 +33,7 @@ fn test_nullable_fsl_with_nulls() { assert!(!first.is_null()); assert_eq!( first, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I32.into()), vec![1i32.into(), 2i32.into()], Nullability::Nullable, @@ -54,7 +54,7 @@ fn test_nullable_fsl_with_nulls() { assert!(!third.is_null()); assert_eq!( third, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::I32.into()), vec![5i32.into(), 6i32.into()], Nullability::Nullable, @@ -95,7 +95,7 @@ fn test_nullable_elements_non_nullable_lists() { assert!(!first.is_null()); assert_eq!( first, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![Some(1i32).into(), None::.into(), Some(3i32).into(),], Nullability::NonNullable, @@ -107,7 +107,7 @@ fn test_nullable_elements_non_nullable_lists() { assert!(!second.is_null()); assert_eq!( second, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![Some(4i32).into(), Some(5i32).into(), None::.into(),], Nullability::NonNullable, @@ -133,7 +133,7 @@ fn test_nullable_elements_and_nullable_lists() { assert!(!first.is_null()); assert_eq!( first, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), vec![Some(10u16).into(), None::.into()], Nullability::Nullable, @@ -154,7 +154,7 @@ fn test_nullable_elements_and_nullable_lists() { assert!(!third.is_null()); assert_eq!( third, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), vec![None::.into(), None::.into()], Nullability::Nullable, @@ -187,7 +187,7 @@ fn test_alternating_nulls() { let expected_value = u8::try_from(i + 1).unwrap(); assert_eq!( scalar, - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( Arc::new(PType::U8.into()), vec![expected_value.into()], Nullability::Nullable, diff --git a/vortex-array/src/arrays/fixed_size_list/tests/take.rs b/vortex-array/src/arrays/fixed_size_list/tests/take.rs index 68ff369fe52..d04daa9d42c 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/take.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/take.rs @@ -167,7 +167,7 @@ fn test_take_nullable_arrays_fsl_specific( let scalars: Vec = list.into_iter().map(|v| v.into()).collect(); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( DType::Primitive(PType::I32, Nullability::NonNullable), scalars, Nullability::NonNullable, diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs b/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs index 3c57bfaac11..baf29d2b592 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs @@ -6,22 +6,14 @@ use vortex_error::VortexResult; use crate::arrays::FixedSizeListArray; use crate::arrays::FixedSizeListVTable; use crate::scalar::Scalar; +use crate::scalar::ScalarValue; use crate::vtable::OperationsVTable; impl OperationsVTable for FixedSizeListVTable { fn scalar_at(array: &FixedSizeListArray, index: usize) -> VortexResult { // By the preconditions we know that the list scalar is not null. let list = array.fixed_size_list_elements_at(index)?; - let children_elements: Vec = (0..list.len()) - .map(|i| list.scalar_at(i)) - .collect::>()?; - - debug_assert_eq!(children_elements.len(), array.list_size() as usize); - - Ok(Scalar::fixed_size_list( - list.dtype().clone(), - children_elements, - array.dtype().nullability(), - )) + let scalar_value = ScalarValue::Array(list); + Scalar::try_new(array.dtype().clone(), Some(scalar_value)) } } diff --git a/vortex-array/src/arrays/list/compute/take.rs b/vortex-array/src/arrays/list/compute/take.rs index 1d299516b59..2021bedb414 100644 --- a/vortex-array/src/arrays/list/compute/take.rs +++ b/vortex-array/src/arrays/list/compute/take.rs @@ -231,7 +231,7 @@ mod test { assert!(result.is_valid(0).unwrap()); assert_eq!( result.scalar_at(0).unwrap(), - Scalar::list( + Scalar::list_from_scalars( element_dtype.clone(), vec![0i32.into(), 5.into()], Nullability::Nullable @@ -243,7 +243,7 @@ mod test { assert!(result.is_valid(2).unwrap()); assert_eq!( result.scalar_at(2).unwrap(), - Scalar::list( + Scalar::list_from_scalars( element_dtype.clone(), vec![3i32.into()], Nullability::Nullable @@ -253,7 +253,7 @@ mod test { assert!(result.is_valid(3).unwrap()); assert_eq!( result.scalar_at(3).unwrap(), - Scalar::list(element_dtype, vec![], Nullability::Nullable) + Scalar::list_from_scalars(element_dtype, vec![], Nullability::Nullable) ); } @@ -311,7 +311,7 @@ mod test { assert!(result.is_valid(0).unwrap()); assert_eq!( result.scalar_at(0).unwrap(), - Scalar::list( + Scalar::list_from_scalars( element_dtype.clone(), vec![3i32.into()], Nullability::NonNullable @@ -321,7 +321,7 @@ mod test { assert!(result.is_valid(1).unwrap()); assert_eq!( result.scalar_at(1).unwrap(), - Scalar::list( + Scalar::list_from_scalars( element_dtype.clone(), vec![0i32.into(), 5.into()], Nullability::NonNullable @@ -331,7 +331,7 @@ mod test { assert!(result.is_valid(2).unwrap()); assert_eq!( result.scalar_at(2).unwrap(), - Scalar::list(element_dtype, vec![], Nullability::NonNullable) + Scalar::list_from_scalars(element_dtype, vec![], Nullability::NonNullable) ); } diff --git a/vortex-array/src/arrays/list/test_harness.rs b/vortex-array/src/arrays/list/test_harness.rs index 7ced1eb878f..542d2477a99 100644 --- a/vortex-array/src/arrays/list/test_harness.rs +++ b/vortex-array/src/arrays/list/test_harness.rs @@ -35,7 +35,7 @@ impl ListArray { ); for v in iter { - let elem = Scalar::list( + let elem = Scalar::list_from_scalars( dtype.clone(), v.into_iter().map(|x| x.into()).collect_vec(), dtype.nullability(), @@ -63,7 +63,7 @@ impl ListArray { for v in iter { if let Some(v) = v { - let elem = Scalar::list( + let elem = Scalar::list_from_scalars( dtype.clone(), v.into_iter().map(|x| x.into()).collect_vec(), dtype.nullability(), diff --git a/vortex-array/src/arrays/list/tests.rs b/vortex-array/src/arrays/list/tests.rs index 9d63372ba6b..8ee1395a17a 100644 --- a/vortex-array/src/arrays/list/tests.rs +++ b/vortex-array/src/arrays/list/tests.rs @@ -46,7 +46,7 @@ fn test_simple_list_array() { let list = ListArray::try_new(elements, offsets, validity).unwrap(); assert_eq!( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![1.into(), 2.into()], Nullability::Nullable @@ -54,7 +54,7 @@ fn test_simple_list_array() { list.scalar_at(0).unwrap() ); assert_eq!( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![3.into(), 4.into()], Nullability::Nullable @@ -62,7 +62,7 @@ fn test_simple_list_array() { list.scalar_at(1).unwrap() ); assert_eq!( - Scalar::list(Arc::new(I32.into()), vec![5.into()], Nullability::Nullable), + Scalar::list_from_scalars(Arc::new(I32.into()), vec![5.into()], Nullability::Nullable), list.scalar_at(2).unwrap() ); } @@ -375,7 +375,7 @@ fn test_offset_to_0() { ListBuilder::::with_capacity(Arc::new(I32.into()), Nullability::NonNullable, 10, 5); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![1.into(), 2.into(), 3.into()], Nullability::NonNullable, @@ -385,7 +385,7 @@ fn test_offset_to_0() { .vortex_expect("operation should succeed in test"); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![4.into(), 5.into(), 6.into()], Nullability::NonNullable, @@ -395,7 +395,7 @@ fn test_offset_to_0() { .vortex_expect("operation should succeed in test"); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![7.into(), 8.into(), 9.into()], Nullability::NonNullable, @@ -405,7 +405,7 @@ fn test_offset_to_0() { .vortex_expect("operation should succeed in test"); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![10.into(), 11.into(), 12.into()], Nullability::NonNullable, @@ -415,7 +415,7 @@ fn test_offset_to_0() { .vortex_expect("operation should succeed in test"); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( Arc::new(I32.into()), vec![13.into(), 14.into(), 15.into()], Nullability::NonNullable, diff --git a/vortex-array/src/arrays/list/vtable/operations.rs b/vortex-array/src/arrays/list/vtable/operations.rs index c326786a510..3459557d97a 100644 --- a/vortex-array/src/arrays/list/vtable/operations.rs +++ b/vortex-array/src/arrays/list/vtable/operations.rs @@ -1,27 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::sync::Arc; - use vortex_error::VortexResult; use crate::arrays::ListArray; use crate::arrays::ListVTable; use crate::scalar::Scalar; +use crate::scalar::ScalarValue; use crate::vtable::OperationsVTable; impl OperationsVTable for ListVTable { fn scalar_at(array: &ListArray, index: usize) -> VortexResult { // By the preconditions we know that the list scalar is not null. - let elems = array.list_elements_at(index)?; - let scalars: Vec = (0..elems.len()) - .map(|i| elems.scalar_at(i)) - .collect::>()?; - - Ok(Scalar::list( - Arc::new(elems.dtype().clone()), - scalars, - array.dtype().nullability(), - )) + let list = array.list_elements_at(index)?; + let scalar_value = ScalarValue::Array(list); + Scalar::try_new(array.dtype().clone(), Some(scalar_value)) } } diff --git a/vortex-array/src/arrays/listview/tests/basic.rs b/vortex-array/src/arrays/listview/tests/basic.rs index 09dce5797bf..385476ba247 100644 --- a/vortex-array/src/arrays/listview/tests/basic.rs +++ b/vortex-array/src/arrays/listview/tests/basic.rs @@ -57,7 +57,7 @@ fn test_basic_listview_comprehensive() { let first_scalar = listview.scalar_at(0).unwrap(); assert_eq!( first_scalar, - Scalar::list( + Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![1i32.into(), 2i32.into(), 3i32.into()], Nullability::NonNullable, diff --git a/vortex-array/src/arrays/listview/tests/nullability.rs b/vortex-array/src/arrays/listview/tests/nullability.rs index 30b2a192ef1..464cbc32c7d 100644 --- a/vortex-array/src/arrays/listview/tests/nullability.rs +++ b/vortex-array/src/arrays/listview/tests/nullability.rs @@ -48,7 +48,7 @@ fn test_nullable_listview_comprehensive() { assert!(!first.is_null()); assert_eq!( first, - Scalar::list( + Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![1i32.into(), 2i32.into()], Nullability::Nullable, @@ -62,7 +62,7 @@ fn test_nullable_listview_comprehensive() { assert!(!third.is_null()); assert_eq!( third, - Scalar::list( + Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![5i32.into(), 6i32.into()], Nullability::Nullable, diff --git a/vortex-array/src/arrays/listview/vtable/operations.rs b/vortex-array/src/arrays/listview/vtable/operations.rs index 60bfc873548..f953b2769da 100644 --- a/vortex-array/src/arrays/listview/vtable/operations.rs +++ b/vortex-array/src/arrays/listview/vtable/operations.rs @@ -1,27 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::sync::Arc; - use vortex_error::VortexResult; use crate::arrays::ListViewArray; use crate::arrays::ListViewVTable; use crate::scalar::Scalar; +use crate::scalar::ScalarValue; use crate::vtable::OperationsVTable; impl OperationsVTable for ListViewVTable { fn scalar_at(array: &ListViewArray, index: usize) -> VortexResult { // By the preconditions we know that the list scalar is not null. let list = array.list_elements_at(index)?; - let children: Vec = (0..list.len()) - .map(|i| list.scalar_at(i)) - .collect::>()?; - - Ok(Scalar::list( - Arc::new(list.dtype().clone()), - children, - array.dtype.nullability(), - )) + let scalar_value = ScalarValue::Array(list); + Scalar::try_new(array.dtype().clone(), Some(scalar_value)) } } diff --git a/vortex-array/src/arrow/record_batch.rs b/vortex-array/src/arrow/record_batch.rs index 0b2d0af547d..af21e3bc99f 100644 --- a/vortex-array/src/arrow/record_batch.rs +++ b/vortex-array/src/arrow/record_batch.rs @@ -76,7 +76,7 @@ mod tests { Nullability::Nullable, ); - xs.append_scalar(&Scalar::list( + xs.append_scalar(&Scalar::list_from_scalars( xs.element_dtype().clone(), vec![1i32.into(), 2i32.into(), 3i32.into()], Nullability::Nullable, diff --git a/vortex-array/src/builders/fixed_size_list.rs b/vortex-array/src/builders/fixed_size_list.rs index 3ce90ad339e..910064de05b 100644 --- a/vortex-array/src/builders/fixed_size_list.rs +++ b/vortex-array/src/builders/fixed_size_list.rs @@ -108,8 +108,7 @@ impl FixedSizeListBuilder { /// /// [`ListArray`]: crate::arrays::ListArray pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> { - let Some(elements) = value.elements() else { - // If `elements` is `None`, then the `value` is a null value. + let Some(array) = value.as_array() else { self.append_null(); return Ok(()); }; @@ -123,14 +122,7 @@ impl FixedSizeListBuilder { ); } - for scalar in elements { - // TODO(connor): This is slow, we should be able to append multiple values at once, or - // the list scalar should hold an Array - self.elements_builder.append_scalar(&scalar)?; - } - self.nulls.append_non_null(); - - Ok(()) + self.append_array_as_list(array) } /// Finishes the builder directly into a [`FixedSizeListArray`]. @@ -305,7 +297,7 @@ mod tests { builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype.clone(), vec![1i32.into(), 2i32.into(), 3i32.into()], NonNullable, @@ -316,7 +308,7 @@ mod tests { builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype, vec![4i32.into(), 5i32.into(), 6i32.into()], NonNullable, @@ -342,7 +334,10 @@ mod tests { // Append multiple "empty" lists. for _ in 0..100 { builder - .append_value(Scalar::fixed_size_list(dtype.clone(), vec![], NonNullable).as_list()) + .append_value( + Scalar::fixed_size_list_from_scalars(dtype.clone(), vec![], NonNullable) + .as_list(), + ) .unwrap(); } @@ -366,7 +361,8 @@ mod tests { if i % 2 == 0 { builder .append_value( - Scalar::fixed_size_list(dtype.clone(), vec![], Nullable).as_list(), + Scalar::fixed_size_list_from_scalars(dtype.clone(), vec![], Nullable) + .as_list(), ) .unwrap(); } else { @@ -392,7 +388,7 @@ mod tests { for i in 0..5 { builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype.clone(), vec![(i * 2).into(), (i * 2 + 1).into()], NonNullable, @@ -430,8 +426,12 @@ mod tests { builder .append_value( - Scalar::fixed_size_list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable) - .as_list(), + Scalar::fixed_size_list_from_scalars( + dtype.clone(), + vec![1i32.into(), 2i32.into()], + Nullable, + ) + .as_list(), ) .unwrap(); @@ -439,7 +439,12 @@ mod tests { builder .append_value( - Scalar::fixed_size_list(dtype, vec![3i32.into(), 4i32.into()], Nullable).as_list(), + Scalar::fixed_size_list_from_scalars( + dtype, + vec![3i32.into(), 4i32.into()], + Nullable, + ) + .as_list(), ) .unwrap(); @@ -459,7 +464,7 @@ mod tests { builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype.clone(), vec![ Scalar::primitive(1i32, Nullable), @@ -474,7 +479,7 @@ mod tests { builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype, vec![ Scalar::primitive(4i32, Nullable), @@ -582,7 +587,7 @@ mod tests { // Try to append a list with wrong size. let result = builder.append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype, vec![1i32.into(), 2i32.into()], // Only 2 elements, not 3. NonNullable, @@ -688,7 +693,7 @@ mod tests { // Add some initial data. builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype, vec![1i32.into(), 2i32.into(), 3i32.into()], NonNullable, @@ -713,7 +718,7 @@ mod tests { // Mix of operations. builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype, vec![ Scalar::primitive(1i32, Nullable), @@ -758,13 +763,19 @@ mod tests { let mut builder = FixedSizeListBuilder::with_capacity(dtype.clone(), 2, Nullable, 10); // Test appending a valid fixed-size list. - let list_scalar1 = - Scalar::fixed_size_list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable); + let list_scalar1 = Scalar::fixed_size_list_from_scalars( + dtype.clone(), + vec![1i32.into(), 2i32.into()], + Nullable, + ); builder.append_scalar(&list_scalar1).unwrap(); // Test appending another list. - let list_scalar2 = - Scalar::fixed_size_list(dtype.clone(), vec![3i32.into(), 4i32.into()], Nullable); + let list_scalar2 = Scalar::fixed_size_list_from_scalars( + dtype.clone(), + vec![3i32.into(), 4i32.into()], + Nullable, + ); builder.append_scalar(&list_scalar2).unwrap(); // Test appending null via builder method (since fixed-size list null handling is special). @@ -814,7 +825,7 @@ mod tests { // Interleave with a list scalar. builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype.clone(), vec![10i32.into(), 11i32.into(), 12i32.into()], NonNullable, @@ -830,7 +841,7 @@ mod tests { // Interleave with another list scalar. builder .append_value( - Scalar::fixed_size_list( + Scalar::fixed_size_list_from_scalars( dtype.clone(), vec![20i32.into(), 21i32.into(), 22i32.into()], NonNullable, diff --git a/vortex-array/src/builders/list.rs b/vortex-array/src/builders/list.rs index 643402b6555..09e6ed51319 100644 --- a/vortex-array/src/builders/list.rs +++ b/vortex-array/src/builders/list.rs @@ -6,7 +6,6 @@ use std::sync::Arc; use vortex_error::VortexExpect; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_panic; use vortex_mask::Mask; @@ -111,29 +110,13 @@ impl ListBuilder { /// Appends a list `value` to the builder. pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> { - match value.elements() { + match value.as_array() { + Some(array) => self.append_array_as_list(array), None => { - if self.dtype.nullability() == NonNullable { - vortex_bail!("Cannot append null value to non-nullable list"); - } self.append_null(); - } - Some(elements) => { - for scalar in elements { - // TODO(connor): This is slow, we should be able to append multiple values at - // once, or the list scalar should hold an Array - self.elements_builder.append_scalar(&scalar)?; - } - - self.nulls.append_non_null(); - self.offsets_builder.append_value( - O::from_usize(self.elements_builder.len()) - .vortex_expect("Failed to convert from usize to O"), - ); + Ok(()) } } - - Ok(()) } /// Finishes the builder directly into a [`ListArray`]. @@ -341,7 +324,7 @@ mod tests { builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype.clone(), vec![1i32.into(), 2i32.into(), 3i32.into()], NonNullable, @@ -352,7 +335,7 @@ mod tests { builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype, vec![4i32.into(), 5i32.into(), 6i32.into()], NonNullable, @@ -389,7 +372,7 @@ mod tests { builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype.clone(), vec![1i32.into(), 2i32.into(), 3i32.into()], NonNullable, @@ -404,7 +387,7 @@ mod tests { builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype, vec![4i32.into(), 5i32.into(), 6i32.into()], NonNullable, @@ -518,11 +501,12 @@ mod tests { let mut builder = ListBuilder::::with_capacity(dtype.clone(), Nullable, 20, 10); // Test appending a valid list. - let list_scalar1 = Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable); + let list_scalar1 = + Scalar::list_from_scalars(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable); builder.append_scalar(&list_scalar1).unwrap(); // Test appending another list. - let list_scalar2 = Scalar::list( + let list_scalar2 = Scalar::list_from_scalars( dtype.clone(), vec![3i32.into(), 4i32.into(), 5i32.into()], Nullable, @@ -582,8 +566,12 @@ mod tests { // Interleave with a list scalar. builder .append_value( - Scalar::list(dtype.clone(), vec![10i32.into(), 11i32.into()], NonNullable) - .as_list(), + Scalar::list_from_scalars( + dtype.clone(), + vec![10i32.into(), 11i32.into()], + NonNullable, + ) + .as_list(), ) .unwrap(); diff --git a/vortex-array/src/builders/listview.rs b/vortex-array/src/builders/listview.rs index dc3509c55d9..15bead3225a 100644 --- a/vortex-array/src/builders/listview.rs +++ b/vortex-array/src/builders/listview.rs @@ -152,39 +152,13 @@ impl ListViewBuilder { /// This method extends the value builder with the provided values and records /// the offset and size of the new list. pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> { - let Some(elements) = value.elements() else { - // If `elements` is `None`, then the `value` is a null value. - vortex_ensure!( - self.dtype.is_nullable(), - "Cannot append null value to non-nullable list builder" - ); - self.append_null(); - return Ok(()); - }; - - let curr_offset = self.elements_builder.len(); - let num_elements = elements.len(); - - // We must assert this even in release mode to ensure that the safety comment in - // `finish_into_listview` is correct. - assert!( - ((curr_offset + num_elements) as u64) < O::max_value_as_u64(), - "appending this list would cause an offset overflow" - ); - - for scalar in elements { - self.elements_builder.append_scalar(&scalar)?; + match value.as_array() { + Some(array) => self.append_array_as_list(array), + None => { + self.append_null(); + Ok(()) + } } - self.nulls.append_non_null(); - - self.offsets_builder.append_value( - O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"), - ); - self.sizes_builder.append_value( - S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"), - ); - - Ok(()) } /// Finishes the builder directly into a [`ListViewArray`]. @@ -460,7 +434,7 @@ mod tests { // Append a regular list. builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype.clone(), vec![1i32.into(), 2i32.into(), 3i32.into()], NonNullable, @@ -480,7 +454,8 @@ mod tests { // Append another regular list. builder .append_value( - Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(), + Scalar::list_from_scalars(dtype, vec![4i32.into(), 5i32.into()], NonNullable) + .as_list(), ) .unwrap(); @@ -515,13 +490,18 @@ mod tests { builder .append_value( - Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], NonNullable).as_list(), + Scalar::list_from_scalars( + dtype.clone(), + vec![1i32.into(), 2i32.into()], + NonNullable, + ) + .as_list(), ) .unwrap(); builder .append_value( - Scalar::list( + Scalar::list_from_scalars( dtype, vec![3i32.into(), 4i32.into(), 5i32.into()], NonNullable, @@ -553,7 +533,8 @@ mod tests { for i in 0..5 { builder2 .append_value( - Scalar::list(dtype2.clone(), vec![(i * 10).into()], NonNullable).as_list(), + Scalar::list_from_scalars(dtype2.clone(), vec![(i * 10).into()], NonNullable) + .as_list(), ) .unwrap(); } @@ -586,7 +567,8 @@ mod tests { assert_eq!(builder.len(), 4); // Test append_scalar. - let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable); + let list_scalar = + Scalar::list_from_scalars(dtype, vec![10i32.into(), 20i32.into()], Nullable); builder.append_scalar(&list_scalar).unwrap(); assert_eq!(builder.len(), 5); @@ -623,7 +605,9 @@ mod tests { // Add initial data. builder - .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list()) + .append_value( + Scalar::list_from_scalars(dtype, vec![0i32.into()], NonNullable).as_list(), + ) .unwrap(); // Extend from the ListArray. @@ -663,27 +647,6 @@ mod tests { ); } - #[test] - fn test_error_append_null_to_non_nullable() { - let dtype: Arc = Arc::new(I32.into()); - let mut builder = - ListViewBuilder::::with_capacity(dtype.clone(), NonNullable, 0, 0); - - // Create a null list with nullable type (since Scalar::null requires nullable type). - let null_scalar = Scalar::null(DType::List(dtype, Nullable)); - let null_list = null_scalar.as_list(); - - // This should fail because we're trying to append a null to a non-nullable builder. - let result = builder.append_value(null_list); - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("null value to non-nullable") - ); - } - #[test] fn test_append_array_as_list() { use vortex_buffer::buffer; @@ -699,8 +662,12 @@ mod tests { // Interleave with a list scalar. builder .append_value( - Scalar::list(dtype.clone(), vec![10i32.into(), 11i32.into()], NonNullable) - .as_list(), + Scalar::list_from_scalars( + dtype.clone(), + vec![10i32.into(), 11i32.into()], + NonNullable, + ) + .as_list(), ) .unwrap(); diff --git a/vortex-array/src/builders/mod.rs b/vortex-array/src/builders/mod.rs index 489c714730c..3ceada928ea 100644 --- a/vortex-array/src/builders/mod.rs +++ b/vortex-array/src/builders/mod.rs @@ -287,3 +287,16 @@ pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box ArrayRef { + let mut builder = builder_with_capacity(dtype, scalars.len()); + + for scalar in scalars { + builder + .append_scalar(scalar) + .vortex_expect("failed to append scalar to array builder"); + } + + builder.finish() +} diff --git a/vortex-array/src/builders/tests.rs b/vortex-array/src/builders/tests.rs index ec46ca8879d..0b1560aea1d 100644 --- a/vortex-array/src/builders/tests.rs +++ b/vortex-array/src/builders/tests.rs @@ -608,7 +608,7 @@ fn create_test_scalars_for_dtype(dtype: &DType, count: usize) -> Vec { _ => Scalar::default_value(element_dtype.as_ref()), }) .collect(); - Scalar::list(element_dtype.clone(), elements, *n) + Scalar::list_from_scalars(element_dtype.clone(), elements, *n) } DType::FixedSizeList(element_dtype, size, n) => { // Create fixed-size list scalars. @@ -620,7 +620,7 @@ fn create_test_scalars_for_dtype(dtype: &DType, count: usize) -> Vec { _ => Scalar::default_value(element_dtype.as_ref()), }) .collect(); - Scalar::fixed_size_list(element_dtype.clone(), elements, *n) + Scalar::fixed_size_list_from_scalars(element_dtype.clone(), elements, *n) } DType::Extension(ext_dtype) => { // Create extension scalars with storage values. diff --git a/vortex-array/src/scalar/arbitrary.rs b/vortex-array/src/scalar/arbitrary.rs index 342d4fa59b0..a047dc217e9 100644 --- a/vortex-array/src/scalar/arbitrary.rs +++ b/vortex-array/src/scalar/arbitrary.rs @@ -14,6 +14,7 @@ use vortex_buffer::BufferString; use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::dtype::DecimalDType; use crate::dtype::NativeDecimalType; @@ -66,35 +67,39 @@ pub fn random_scalar(u: &mut Unstructured, dtype: &DType) -> Result { .vortex_expect("unable to construct random `Scalar`_"), DType::Struct(sdt, _) => Scalar::try_new( dtype.clone(), - Some(ScalarValue::List( + Some(ScalarValue::Struct( sdt.fields() .map(|d| random_scalar(u, &d).map(|s| s.into_value())) .collect::>>()?, )), ) .vortex_expect("unable to construct random `Scalar`_"), - DType::List(edt, _) => Scalar::try_new( - dtype.clone(), - Some(ScalarValue::List( - iter::from_fn(|| { - // Generate elements with 1/4 probability. - u.arbitrary() - .unwrap_or(false) - .then(|| random_scalar(u, edt).map(|s| s.into_value())) - }) - .collect::>>()?, - )), - ) - .vortex_expect("unable to construct random `Scalar`_"), - DType::FixedSizeList(edt, size, _) => Scalar::try_new( - dtype.clone(), - Some(ScalarValue::List( - (0..*size) - .map(|_| random_scalar(u, edt).map(|s| s.into_value())) - .collect::>>()?, - )), - ) - .vortex_expect("unable to construct random `Scalar`_"), + DType::List(edt, _) => { + let elements: Vec = iter::from_fn(|| { + // Generate elements with 1/4 probability. + u.arbitrary() + .unwrap_or(false) + .then(|| random_scalar(u, edt)) + }) + .collect::>>()?; + + Scalar::try_new( + dtype.clone(), + Some(ScalarValue::Array(build_array_from_scalars(edt, &elements))), + ) + .vortex_expect("unable to construct random `Scalar`_") + } + DType::FixedSizeList(edt, size, _) => { + let elements: Vec = (0..*size) + .map(|_| random_scalar(u, edt)) + .collect::>>()?; + + Scalar::try_new( + dtype.clone(), + Some(ScalarValue::Array(build_array_from_scalars(edt, &elements))), + ) + .vortex_expect("unable to construct random `Scalar`_") + } DType::Extension(..) => { unreachable!("Can't yet generate arbitrary scalars for ext dtype") } diff --git a/vortex-array/src/scalar/arrow.rs b/vortex-array/src/scalar/arrow.rs index bce11e27e0f..e40edcefcb8 100644 --- a/vortex-array/src/scalar/arrow.rs +++ b/vortex-array/src/scalar/arrow.rs @@ -431,7 +431,7 @@ mod tests { #[should_panic(expected = "list scalar conversion")] fn test_list_scalar_to_arrow_todo() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( element_dtype, vec![ Scalar::primitive(1i32, Nullability::NonNullable), diff --git a/vortex-array/src/scalar/cast.rs b/vortex-array/src/scalar/cast.rs index 82986cd278d..22af0dc0b8f 100644 --- a/vortex-array/src/scalar/cast.rs +++ b/vortex-array/src/scalar/cast.rs @@ -23,10 +23,11 @@ impl Scalar { return Ok(self.clone()); } - // Check for solely nullability casting. - if self.dtype().eq_ignore_nullability(target_dtype) { - // Cast from non-nullable to nullable or vice versa. - // The `try_new` will handle nullability checks. + // TODO(connor): Is this correct? + // Check for solely top-level nullability casting. We compare after adjusting only the + // top-level nullability (not recursive) so that nested types like List/FSL/Struct still + // go through the full cast path when their element or field nullability differs. + if self.dtype().with_nullability(target_dtype.nullability()) == *target_dtype { return Scalar::try_new(target_dtype.clone(), self.value().cloned()); } diff --git a/vortex-array/src/scalar/constructor.rs b/vortex-array/src/scalar/constructor.rs index dd10b4e0e90..7095bd09006 100644 --- a/vortex-array/src/scalar/constructor.rs +++ b/vortex-array/src/scalar/constructor.rs @@ -10,6 +10,8 @@ use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; use vortex_error::vortex_panic; +use crate::ArrayRef; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::dtype::DecimalDType; use crate::dtype::NativePType; @@ -101,41 +103,73 @@ impl Scalar { .vortex_expect("unable to construct a binary `Scalar`") } + /// Creates a new list scalar using the [`DType`] of the parent array and a child array + /// representing the list as an array. + /// + /// # Panics + /// + /// Panics if the parent dtype is not a `List` or `FixedSixeList` with an `elem_dtype` equal to + /// the `elem_list`'s [`DType`]. + /// + /// Also panics if the parent dtype is a `FixedSizedList` but the `list_size` is not equal to + /// the array length. + pub fn list_array(parent_dtype: DType, elem_list: ArrayRef) -> Self { + match &parent_dtype { + DType::List(elem_dtype, _) => { + assert_eq!(elem_dtype.as_ref(), elem_list.dtype()); + } + DType::FixedSizeList(elem_dtype, list_size, _) => { + assert_eq!(elem_dtype.as_ref(), elem_list.dtype()); + assert_eq!(elem_list.len(), *list_size as usize); + } + _ => vortex_panic!("expected List or FixedSizeList dtype, got {parent_dtype}"), + }; + + let scalar_value = ScalarValue::Array(elem_list); + + // SAFETY: We just checked that the scalar value is valid for this dtype. + unsafe { Scalar::new_unchecked(parent_dtype, Some(scalar_value)) } + } + /// Creates a new list scalar with the given element type and children. /// + /// Callers should prefer to use the faster `Scalar::list_array` constructor when possible. + /// /// # Panics /// /// Panics if any child scalar has a different type than the element type, or if there are too /// many children. - pub fn list( + pub fn list_from_scalars( element_dtype: impl Into>, children: Vec, nullability: Nullability, ) -> Self { - Self::create_list(element_dtype, children, nullability, ListKind::Variable) + Self::create_list_from_scalars(element_dtype, children, nullability, ListKind::Variable) } /// Creates a new empty list scalar with the given element type. pub fn list_empty(element_dtype: Arc, nullability: Nullability) -> Self { - Self::create_list(element_dtype, vec![], nullability, ListKind::Variable) + Self::create_list_from_scalars(element_dtype, vec![], nullability, ListKind::Variable) } /// Creates a new fixed-size list scalar with the given element type and children. /// + /// Callers should prefer to use the faster `Scalar::list_array` constructor when possible. + /// /// # Panics /// /// Panics if any child scalar has a different type than the element type, or if there are too /// many children. - pub fn fixed_size_list( + pub fn fixed_size_list_from_scalars( element_dtype: impl Into>, children: Vec, nullability: Nullability, ) -> Self { - Self::create_list(element_dtype, children, nullability, ListKind::FixedSize) + Self::create_list_from_scalars(element_dtype, children, nullability, ListKind::FixedSize) } /// Creates a list [`Scalar`] from an element dtype, children, nullability, and list kind. - fn create_list( + fn create_list_from_scalars( element_dtype: impl Into>, children: Vec, nullability: Nullability, @@ -143,30 +177,30 @@ impl Scalar { ) -> Self { let element_dtype = element_dtype.into(); - let children: Vec> = children - .into_iter() - .map(|child| { - if child.dtype() != &*element_dtype { - vortex_panic!( - "tried to create list of {} with values of type {}", - element_dtype, - child.dtype() - ); - } - child.into_value() - }) - .collect(); + // Validate all children have the correct dtype. + for child in &children { + if child.dtype() != &*element_dtype { + vortex_panic!( + "tried to create list of {} with values of type {}", + element_dtype, + child.dtype() + ); + } + } + let size: u32 = children .len() .try_into() .vortex_expect("tried to create a list that was too large"); + let array = build_array_from_scalars(&element_dtype, &children); + let dtype = match list_kind { ListKind::Variable => DType::List(element_dtype, nullability), ListKind::FixedSize => DType::FixedSizeList(element_dtype, size, nullability), }; - Self::try_new(dtype, Some(ScalarValue::List(children))) + Self::try_new(dtype, Some(ScalarValue::Array(array))) .vortex_expect("unable to construct a list `Scalar`") } diff --git a/vortex-array/src/scalar/convert/into_scalar.rs b/vortex-array/src/scalar/convert/into_scalar.rs index ac804c06dde..5c3e335358c 100644 --- a/vortex-array/src/scalar/convert/into_scalar.rs +++ b/vortex-array/src/scalar/convert/into_scalar.rs @@ -9,6 +9,7 @@ use vortex_buffer::BufferString; use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::dtype::DecimalDType; use crate::dtype::NativeDType; @@ -82,11 +83,9 @@ where Scalar: From, { fn from(vec: Vec) -> Self { - ScalarValue::List( - vec.into_iter() - .map(|elem| Scalar::from(elem).into_value()) - .collect(), - ) + let dtype = T::dtype(); + let scalars: Vec = vec.into_iter().map(Scalar::from).collect(); + ScalarValue::Array(build_array_from_scalars(&dtype, &scalars)) } } diff --git a/vortex-array/src/scalar/downcast.rs b/vortex-array/src/scalar/downcast.rs index 6de05dab4e3..323179197e9 100644 --- a/vortex-array/src/scalar/downcast.rs +++ b/vortex-array/src/scalar/downcast.rs @@ -8,6 +8,7 @@ use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; use vortex_error::vortex_panic; +use crate::array::ArrayRef; use crate::scalar::BinaryScalar; use crate::scalar::BoolScalar; use crate::scalar::DecimalScalar; @@ -119,7 +120,8 @@ impl Scalar { /// /// # Panics /// - /// Panics if the scalar does not have a [`List`](crate::dtype::DType::List) or [`FixedSizeList`](crate::dtype::DType::FixedSizeList) type. + /// Panics if the scalar does not have a [`List`](crate::dtype::DType::List) or + /// [`FixedSizeList`](crate::dtype::DType::FixedSizeList) type. pub fn as_list(&self) -> ListScalar<'_> { self.as_list_opt() .vortex_expect("Failed to convert scalar to list") @@ -156,7 +158,7 @@ impl Scalar { } impl ScalarValue { - /// Returns the boolean value, panicking if the value is not a [`Bool`][ScalarValue::Bool]. + /// Returns the boolean value, panicking if the value is not a [`Bool`](ScalarValue::Bool). pub fn as_bool(&self) -> bool { match self { ScalarValue::Bool(b) => *b, @@ -165,7 +167,7 @@ impl ScalarValue { } /// Returns the primitive value, panicking if the value is not a - /// [`Primitive`][ScalarValue::Primitive]. + /// [`Primitive`](ScalarValue::Primitive). pub fn as_primitive(&self) -> &PValue { match self { ScalarValue::Primitive(p) => p, @@ -174,7 +176,7 @@ impl ScalarValue { } /// Returns the decimal value, panicking if the value is not a - /// [`Decimal`][ScalarValue::Decimal]. + /// [`Decimal`](ScalarValue::Decimal). pub fn as_decimal(&self) -> &DecimalValue { match self { ScalarValue::Decimal(d) => d, @@ -182,7 +184,7 @@ impl ScalarValue { } } - /// Returns the UTF-8 string value, panicking if the value is not a [`Utf8`][ScalarValue::Utf8]. + /// Returns the UTF-8 string value, panicking if the value is not a [`Utf8`](ScalarValue::Utf8). pub fn as_utf8(&self) -> &BufferString { match self { ScalarValue::Utf8(s) => s, @@ -190,7 +192,7 @@ impl ScalarValue { } } - /// Returns the binary value, panicking if the value is not a [`Binary`][ScalarValue::Binary]. + /// Returns the binary value, panicking if the value is not a [`Binary`](ScalarValue::Binary). pub fn as_binary(&self) -> &ByteBuffer { match self { ScalarValue::Binary(b) => b, @@ -198,15 +200,24 @@ impl ScalarValue { } } - /// Returns the list elements, panicking if the value is not a [`List`][ScalarValue::List]. - pub fn as_list(&self) -> &[Option] { + /// Returns the struct field values, panicking if the value is not a + /// [`Struct`](ScalarValue::Struct). + pub fn as_struct_fields(&self) -> &[Option] { match self { - ScalarValue::List(elements) => elements, - _ => vortex_panic!("ScalarValue is not a List"), + ScalarValue::Struct(elements) => elements, + _ => vortex_panic!("ScalarValue is not a Struct"), } } - /// Returns the boolean value, panicking if the value is not a [`Bool`][ScalarValue::Bool]. + /// Returns the array value, panicking if the value is not an [`Array`](ScalarValue::Array). + pub fn as_list(&self) -> &ArrayRef { + match self { + ScalarValue::Array(array) => array, + _ => vortex_panic!("ScalarValue is not an Array"), + } + } + + /// Returns the boolean value, panicking if the value is not a [`Bool`](ScalarValue::Bool). pub fn into_bool(self) -> bool { match self { ScalarValue::Bool(b) => b, @@ -215,7 +226,7 @@ impl ScalarValue { } /// Returns the primitive value, panicking if the value is not a - /// [`Primitive`][ScalarValue::Primitive]. + /// [`Primitive`](ScalarValue::Primitive). pub fn into_primitive(self) -> PValue { match self { ScalarValue::Primitive(p) => p, @@ -224,7 +235,7 @@ impl ScalarValue { } /// Returns the decimal value, panicking if the value is not a - /// [`Decimal`][ScalarValue::Decimal]. + /// [`Decimal`](ScalarValue::Decimal). pub fn into_decimal(self) -> DecimalValue { match self { ScalarValue::Decimal(d) => d, @@ -232,7 +243,7 @@ impl ScalarValue { } } - /// Returns the UTF-8 string value, panicking if the value is not a [`Utf8`][ScalarValue::Utf8]. + /// Returns the UTF-8 string value, panicking if the value is not a [`Utf8`](ScalarValue::Utf8). pub fn into_utf8(self) -> BufferString { match self { ScalarValue::Utf8(s) => s, @@ -240,7 +251,7 @@ impl ScalarValue { } } - /// Returns the binary value, panicking if the value is not a [`Binary`][ScalarValue::Binary]. + /// Returns the binary value, panicking if the value is not a [`Binary`](ScalarValue::Binary). pub fn into_binary(self) -> ByteBuffer { match self { ScalarValue::Binary(b) => b, @@ -248,11 +259,20 @@ impl ScalarValue { } } - /// Returns the list elements, panicking if the value is not a [`List`][ScalarValue::List]. - pub fn into_list(self) -> Vec> { + /// Returns the struct field values, panicking if the value is not a + /// [`Struct`](ScalarValue::Struct). + pub fn into_struct_fields(self) -> Vec> { + match self { + ScalarValue::Struct(elements) => elements, + _ => vortex_panic!("ScalarValue is not a Struct"), + } + } + + /// Returns the array value, panicking if the value is not an [`Array`](ScalarValue::Array). + pub fn into_list(self) -> ArrayRef { match self { - ScalarValue::List(elements) => elements, - _ => vortex_panic!("ScalarValue is not a List"), + ScalarValue::Array(array) => array, + _ => vortex_panic!("ScalarValue is not an Array"), } } } diff --git a/vortex-array/src/scalar/proto.rs b/vortex-array/src/scalar/proto.rs index b4cdf9a76e6..d4f875449a9 100644 --- a/vortex-array/src/scalar/proto.rs +++ b/vortex-array/src/scalar/proto.rs @@ -3,22 +3,31 @@ //! Protobuf serialization and deserialization for scalars. +use std::sync::Arc; + use num_traits::ToBytes; use num_traits::ToPrimitive; use prost::Message; use vortex_buffer::BufferString; use vortex_buffer::ByteBuffer; +use vortex_buffer::ByteBufferMut; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; +use vortex_error::vortex_ensure_eq; use vortex_error::vortex_err; use vortex_proto::scalar as pb; -use vortex_proto::scalar::ListValue; +use vortex_proto::scalar::ArrayValue; +use vortex_proto::scalar::StructValue; use vortex_proto::scalar::scalar_value::Kind; use vortex_session::VortexSession; +use crate::ArrayContext; +use crate::ArrayRef; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; +use crate::dtype::DecimalDType; use crate::dtype::PType; use crate::dtype::half::f16; use crate::dtype::i256; @@ -26,6 +35,9 @@ use crate::scalar::DecimalValue; use crate::scalar::PValue; use crate::scalar::Scalar; use crate::scalar::ScalarValue; +use crate::serde::ArrayParts; +use crate::serde::SerializeOptions; +use crate::vtable::ArrayId; //////////////////////////////////////////////////////////////////////////////////////////////////// // Serialize INTO proto. @@ -101,13 +113,41 @@ impl From<&ScalarValue> for pb::ScalarValue { ScalarValue::Binary(v) => pb::ScalarValue { kind: Some(Kind::BytesValue(v.to_vec())), }, - ScalarValue::List(v) => { - let mut values = Vec::with_capacity(v.len()); + ScalarValue::Struct(v) => { + let mut fields = Vec::with_capacity(v.len()); for elem in v.iter() { - values.push(ScalarValue::to_proto(elem.as_ref())); + fields.push(ScalarValue::to_proto(elem.as_ref())); + } + pb::ScalarValue { + kind: Some(Kind::StructValue(StructValue { fields })), } + } + ScalarValue::Array(array) => { + let ctx = ArrayContext::empty(); + + // Note that `ctx` interns an ID during serialization (through the &). + let serialized = array + .to_array() + .serialize(&ctx, &SerializeOptions::default()) + .vortex_expect("somehow unable to serialize value as array"); + + let encoding_ids = ctx.to_ids().into_iter().map(|id| id.to_string()).collect(); + + // Serialize the array buffers back-to-back. + let mut data = ByteBufferMut::empty(); + for buf in serialized { + data.extend_from_slice(buf.as_ref()); + } + let data = data.freeze().to_vec(); + + let array_value = ArrayValue { + row_count: array.len() as u64, + encoding_ids, + data, + }; + pb::ScalarValue { - kind: Some(Kind::ListValue(ListValue { values })), + kind: Some(Kind::ArrayValue(array_value)), } } } @@ -244,6 +284,7 @@ impl ScalarValue { _ => dtype, }; + // TODO(connor): These helper functions should probably be called "from_proto_<>" instead. Ok(Some(match kind { Kind::NullValue(_) => return Ok(None), Kind::BoolValue(v) => bool_from_proto(*v, dtype)?, @@ -254,7 +295,8 @@ impl ScalarValue { Kind::F64Value(v) => f64_from_proto(*v, dtype)?, Kind::StringValue(s) => string_from_proto(s, dtype)?, Kind::BytesValue(b) => bytes_from_proto(b, dtype)?, - Kind::ListValue(v) => list_from_proto(v, dtype, session)?, + Kind::StructValue(v) => struct_from_proto(v, dtype, session)?, + Kind::ArrayValue(a) => array_from_proto(a, dtype, session)?, })) } } @@ -377,75 +419,167 @@ fn string_from_proto(s: &str, dtype: &DType) -> VortexResult { } } -/// Deserialize a [`ScalarValue`] from a protobuf bytes and a `DType`. +/// Deserialize a [`ScalarValue`] from a protobuf bytes and a [`DType`]. +/// +/// Handles all variable-size scalars, including: /// -/// Handles [`Utf8`](ScalarValue::Utf8), [`Binary`](ScalarValue::Binary), and -/// [`Decimal`](ScalarValue::Decimal) dtypes. +/// - `Utf8` -> `ScalarValue::Utf8` +/// - `Binary` -> `ScalarValue::Binary` +/// - `Decimal` -> `ScalarValue::Decimal` (Since decimal has different width representations) fn bytes_from_proto(bytes: &[u8], dtype: &DType) -> VortexResult { match dtype { DType::Utf8(_) => Ok(ScalarValue::Utf8(BufferString::try_from(bytes)?)), DType::Binary(_) => Ok(ScalarValue::Binary(ByteBuffer::copy_from(bytes))), - // TODO(connor): This is incorrect, we need to verify this matches the inner decimal_dtype. - DType::Decimal(..) => Ok(ScalarValue::Decimal(match bytes.len() { - 1 => DecimalValue::I8(bytes[0] as i8), - 2 => DecimalValue::I16(i16::from_le_bytes( - bytes - .try_into() - .ok() - .vortex_expect("Buffer has invalid number of bytes"), - )), - 4 => DecimalValue::I32(i32::from_le_bytes( - bytes - .try_into() - .ok() - .vortex_expect("Buffer has invalid number of bytes"), - )), - 8 => DecimalValue::I64(i64::from_le_bytes( - bytes - .try_into() - .ok() - .vortex_expect("Buffer has invalid number of bytes"), - )), - 16 => DecimalValue::I128(i128::from_le_bytes( - bytes - .try_into() - .ok() - .vortex_expect("Buffer has invalid number of bytes"), - )), - 32 => DecimalValue::I256(i256::from_le_bytes( - bytes - .try_into() - .ok() - .vortex_expect("Buffer has invalid number of bytes"), - )), - l => vortex_bail!(Serde: "invalid decimal byte length: {l}"), - })), + DType::Decimal(decimal_dtype, _) => decimal_from_proto(bytes, decimal_dtype), _ => vortex_bail!( - Serde: "expected Utf8, Binary, or Decimal dtype for BytesValue, got {dtype}" + Serde: "expected Utf8, Binary, List, FSL, or Decimal dtype for BytesValue, got {dtype}" ), } } -/// Deserialize a [`ScalarValue::List`] from a protobuf `ListValue`. -fn list_from_proto( - v: &ListValue, +/// Deserialize a [`ScalarValue::Decimal`] from a protobuf bytes. +fn decimal_from_proto(bytes: &[u8], _decimal_dtype: &DecimalDType) -> VortexResult { + let value = match bytes.len() { + 1 => DecimalValue::I8(bytes[0] as i8), + 2 => { + DecimalValue::I16(i16::from_le_bytes(bytes.try_into().ok().vortex_expect( + "we just checked that there was the correct number of bytes", + ))) + } + 4 => { + DecimalValue::I32(i32::from_le_bytes(bytes.try_into().ok().vortex_expect( + "we just checked that there was the correct number of bytes", + ))) + } + 8 => { + DecimalValue::I64(i64::from_le_bytes(bytes.try_into().ok().vortex_expect( + "we just checked that there was the correct number of bytes", + ))) + } + 16 => { + DecimalValue::I128(i128::from_le_bytes(bytes.try_into().ok().vortex_expect( + "we just checked that there was the correct number of bytes", + ))) + } + 32 => { + DecimalValue::I256(i256::from_le_bytes(bytes.try_into().ok().vortex_expect( + "we just checked that there was the correct number of bytes", + ))) + } + l => vortex_bail!(Serde: "invalid decimal byte length: {l}"), + }; + + Ok(ScalarValue::Decimal(value)) +} + +/// Deserialize a [`ScalarValue`] from a protobuf [`StructValue`]. +/// +/// For [`DType::Struct`], this produces [`ScalarValue::Struct`] with the deserialized fields. +/// +/// For [`DType::List`] and [`DType::FixedSizeList`], this handles backwards compatibility: old +/// files may have stored list scalars as `StructValue` (formerly `ListValue`). +/// +/// We reconstruct an array from the individual scalar elements and return [`ScalarValue::Array`]. +fn struct_from_proto( + v: &StructValue, dtype: &DType, session: &VortexSession, ) -> VortexResult { - let element_dtype = dtype - .as_list_element_opt() - .ok_or_else(|| vortex_err!(Serde: "expected List dtype for ListValue, got {dtype}"))?; - - let mut values = Vec::with_capacity(v.values.len()); - for elem in v.values.iter() { - values.push(ScalarValue::from_proto( - elem, - element_dtype.as_ref(), - session, - )?); + match dtype { + DType::Struct(struct_fields, _) => { + let mut values = Vec::with_capacity(v.fields.len()); + for (elem, field_dtype) in v.fields.iter().zip(struct_fields.fields()) { + values.push(ScalarValue::from_proto(elem, &field_dtype, session)?); + } + Ok(ScalarValue::Struct(values)) + } + // Handle backcompat list scalars. + DType::List(elem_dtype, _) => { + let scalars = v + .fields + .iter() + .map(|elem| { + let value = ScalarValue::from_proto(elem, elem_dtype.as_ref(), session)?; + Scalar::try_new(elem_dtype.as_ref().clone(), value) + }) + .collect::>>()?; + + Ok(ScalarValue::Array(build_array_from_scalars( + elem_dtype, &scalars, + ))) + } + // Handle backcompat fixed-size list scalars. + DType::FixedSizeList(elem_dtype, size, _) => { + vortex_ensure_eq!( + v.fields.len(), + *size as usize, + "FixedSizeList expected {size} elements, got {}", + v.fields.len() + ); + + let scalars = v + .fields + .iter() + .map(|elem| { + let value = ScalarValue::from_proto(elem, elem_dtype.as_ref(), session)?; + Scalar::try_new(elem_dtype.as_ref().clone(), value) + }) + .collect::>>()?; + + Ok(ScalarValue::Array(build_array_from_scalars( + elem_dtype, &scalars, + ))) + } + _ => vortex_bail!( + Serde: "expected Struct, List, or FixedSizeList dtype for StructValue, got {dtype}" + ), } +} - Ok(ScalarValue::List(values)) +/// The inner function for deserializing a [`ScalarValue::Array`] from a protobuf `ArrayValue`. +fn array_from_proto_inner( + array_value: &ArrayValue, + elem_dtype: &DType, + session: &VortexSession, +) -> VortexResult { + let array_len: usize = array_value.row_count.try_into().map_err( + |_| vortex_err!(Serde: "array row_count {} exceeds usize::MAX", array_value.row_count), + )?; + + // TODO(connor): Do we care about alignment? + // We don't care about alignment here since ArrayParts will handle it. + let parts = ArrayParts::try_from(array_value.data.as_slice())?; + + let encoding_ids = array_value + .encoding_ids + .iter() + .map(|id| ArrayId::new_arc(Arc::from(id.clone()))) + .collect::>(); + let ctx = ArrayContext::new(encoding_ids); + + parts.decode(elem_dtype, array_len, &ctx, session) +} + +/// Deserialize a [`ScalarValue::Array`] from a protobuf `ArrayValue`. +fn array_from_proto( + array_value: &ArrayValue, + dtype: &DType, + session: &VortexSession, +) -> VortexResult { + match dtype { + DType::List(elem_dtype, _) => { + let array = array_from_proto_inner(array_value, elem_dtype, session)?; + Ok(ScalarValue::Array(array)) + } + DType::FixedSizeList(elem_dtype, list_size, _) => { + let array = array_from_proto_inner(array_value, elem_dtype, session)?; + vortex_ensure_eq!(array.len(), *list_size as usize); + Ok(ScalarValue::Array(array)) + } + _ => vortex_bail!( + Serde: "expected Utf8, Binary, List, FSL, or Decimal dtype for BytesValue, got {dtype}" + ), + } } #[cfg(test)] @@ -517,18 +651,61 @@ mod tests { #[test] fn test_list() { - round_trip(Scalar::new( - DType::List( - Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::List(vec![ - Some(ScalarValue::Primitive(42i32.into())), - Some(ScalarValue::Primitive(43i32.into())), - ])), + let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)); + round_trip(Scalar::list_from_scalars( + element_dtype, + vec![ + Scalar::primitive(42i32, Nullability::Nullable), + Scalar::primitive(43i32, Nullability::Nullable), + ], + Nullability::Nullable, )); } + /// Backwards compatibility: old files may have stored list scalars as `StructValue` (tag 9) + /// in the proto. Verify that deserializing such a payload produces `ScalarValue::Array`. + #[test] + fn test_backcompat_struct_value_with_list_dtype() { + let dtype = DType::List( + Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), + Nullability::Nullable, + ); + + // Build a StructValue proto as old writers would have produced. + let pb_struct = StructValue { + fields: vec![ + ScalarValue::to_proto(Some(&ScalarValue::Primitive(42i32.into()))), + ScalarValue::to_proto(Some(&ScalarValue::Primitive(43i32.into()))), + ], + }; + let pb_scalar_value = pb::ScalarValue { + kind: Some(Kind::StructValue(pb_struct)), + }; + + let deserialized = ScalarValue::from_proto(&pb_scalar_value, &dtype, &session()) + .unwrap() + .expect("should not be null"); + + // The result should be an Array, not a Struct. + assert!( + matches!(deserialized, ScalarValue::Array(_)), + "expected ScalarValue::Array for backcompat list deserialization, got {deserialized:?}" + ); + + // Verify the values are correct. + let scalar = Scalar::try_new(dtype, Some(deserialized)).unwrap(); + let list = scalar.as_list(); + assert_eq!(list.len(), 2); + assert_eq!( + list.element(0).unwrap().as_primitive().typed_value::(), + Some(42) + ); + assert_eq!( + list.element(1).unwrap().as_primitive().typed_value::(), + Some(43) + ); + } + #[test] fn test_f16() { round_trip(Scalar::primitive( diff --git a/vortex-array/src/scalar/scalar_impl.rs b/vortex-array/src/scalar/scalar_impl.rs index c7a0e9b7999..fea04f67725 100644 --- a/vortex-array/src/scalar/scalar_impl.rs +++ b/vortex-array/src/scalar/scalar_impl.rs @@ -186,9 +186,11 @@ impl Scalar { DType::Decimal(..) => value.as_decimal().is_zero(), DType::Utf8(_) => value.as_utf8().is_empty(), DType::Binary(_) => value.as_binary().is_empty(), - DType::List(..) => value.as_list().is_empty(), - DType::FixedSizeList(_, list_size, _) => value.as_list().len() == *list_size as usize, - DType::Struct(struct_fields, _) => value.as_list().len() == struct_fields.nfields(), + DType::List(..) => self.as_list().is_empty(), + DType::FixedSizeList(_, list_size, _) => self.as_list().len() == *list_size as usize, + DType::Struct(struct_fields, _) => { + value.as_struct_fields().len() == struct_fields.nfields() + } DType::Extension(_) => self.as_extension().to_storage_scalar().is_zero()?, }; diff --git a/vortex-array/src/scalar/scalar_value.rs b/vortex-array/src/scalar/scalar_value.rs index 71bf4fcfc77..f20b57c5261 100644 --- a/vortex-array/src/scalar/scalar_value.rs +++ b/vortex-array/src/scalar/scalar_value.rs @@ -4,36 +4,61 @@ //! Core [`ScalarValue`] type definition. use std::cmp::Ordering; -use std::fmt::Display; -use std::fmt::Formatter; +use std::fmt; +use std::hash::Hash; +use std::hash::Hasher; use itertools::Itertools; use vortex_buffer::BufferString; use vortex_buffer::ByteBuffer; +use vortex_error::VortexExpect; use vortex_error::vortex_panic; +use crate::ArrayRef; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::scalar::DecimalValue; use crate::scalar::PValue; +use crate::scalar::Scalar; -/// The value stored in a [`Scalar`][crate::scalar::Scalar]. +/// The value stored in a [`Scalar`](crate::scalar::Scalar). /// /// This enum represents the possible non-null values that can be stored in a scalar. When the /// scalar is null, the value is represented as `None` in the `Option` field. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +/// +/// We can think of this type loosely (heavy emphasis on "loosely") as the serialization / physical +/// type of scalars. +#[derive(Debug, Clone)] pub enum ScalarValue { /// A boolean value. Bool(bool), + /// A primitive numeric value. Primitive(PValue), + /// A decimal value. Decimal(DecimalValue), + /// A UTF-8 encoded string value. Utf8(BufferString), + /// A binary (byte array) value. Binary(ByteBuffer), + /// A list of potentially null scalar values. - List(Vec>), + /// + /// Previously, we used this to represent all of struct, list, and fixed-size list scalars, but + /// with the recent addition of the `ScalarValue::Array` variant below, this only stores struct + /// scalars (and any other list scalars written to files in the past for backcompat). + Struct(Vec>), + + /// An [`ArrayRef`] representing a single list scalar of a `List` or `FixedSizeList` array. + /// + /// We serialize this by using `Array::serialize()` (see vortex-array/src/serde.rs) into + /// protobuf bytes. Note that because we require passing the length of the array to deserialize + /// with [`ArrayParts::decode()`](crate::serde::ArrayParts::decode), we store the array length + /// in the first 8 bytes. + Array(ArrayRef), } impl ScalarValue { @@ -46,17 +71,22 @@ impl ScalarValue { DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)), DType::Utf8(_) => Self::Utf8(BufferString::empty()), DType::Binary(_) => Self::Binary(ByteBuffer::empty()), - DType::List(..) => Self::List(vec![]), + DType::List(edt, _) => Self::Array(build_array_from_scalars(edt, &[])), DType::FixedSizeList(edt, size, _) => { - let elements = (0..*size).map(|_| Some(Self::zero_value(edt))).collect(); - Self::List(elements) + let elements: Vec = (0..*size) + .map(|_| { + Scalar::try_new(edt.as_ref().clone(), Some(Self::zero_value(edt))) + .vortex_expect("failed to create zero scalar") + }) + .collect(); + Self::Array(build_array_from_scalars(edt, &elements)) } DType::Struct(fields, _) => { let field_values = fields .fields() .map(|f| Some(Self::zero_value(&f))) .collect(); - Self::List(field_values) + Self::Struct(field_values) } DType::Extension(ext_dtype) => { // Since we have no way to define a "zero" extension value (since we have no idea @@ -85,14 +115,19 @@ impl ScalarValue { DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)), DType::Utf8(_) => Self::Utf8(BufferString::empty()), DType::Binary(_) => Self::Binary(ByteBuffer::empty()), - DType::List(..) => Self::List(vec![]), + DType::List(edt, _) => Self::Array(build_array_from_scalars(edt, &[])), DType::FixedSizeList(edt, size, _) => { - let elements = (0..*size).map(|_| Self::default_value(edt)).collect(); - Self::List(elements) + let elements: Vec = (0..*size) + .map(|_| { + Scalar::try_new(edt.as_ref().clone(), Self::default_value(edt)) + .vortex_expect("failed to create default scalar") + }) + .collect(); + Self::Array(build_array_from_scalars(edt, &elements)) } DType::Struct(fields, _) => { let field_values = fields.fields().map(|f| Self::default_value(&f)).collect(); - Self::List(field_values) + Self::Struct(field_values) } DType::Extension(ext_dtype) => { // Since we have no way to define a "default" extension value (since we have no idea @@ -104,23 +139,8 @@ impl ScalarValue { } } -impl PartialOrd for ScalarValue { - fn partial_cmp(&self, other: &Self) -> Option { - match (self, other) { - (ScalarValue::Bool(a), ScalarValue::Bool(b)) => a.partial_cmp(b), - (ScalarValue::Primitive(a), ScalarValue::Primitive(b)) => a.partial_cmp(b), - (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a.partial_cmp(b), - (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a.partial_cmp(b), - (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a.partial_cmp(b), - (ScalarValue::List(a), ScalarValue::List(b)) => a.partial_cmp(b), - // (ScalarValue::Extension(a), ScalarValue::Extension(b)) => a.partial_cmp(b), - _ => None, - } - } -} - -impl Display for ScalarValue { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +impl fmt::Display for ScalarValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ScalarValue::Bool(b) => write!(f, "{b}"), ScalarValue::Primitive(p) => write!(f, "{p}"), @@ -150,7 +170,7 @@ impl Display for ScalarValue { write!(f, "{}", to_hex(b)) } } - ScalarValue::List(elements) => { + ScalarValue::Struct(elements) => { write!(f, "[")?; for (i, element) in elements.iter().enumerate() { if i > 0 { @@ -163,6 +183,98 @@ impl Display for ScalarValue { } write!(f, "]") } + ScalarValue::Array(array) => { + write!(f, "[")?; + for i in 0..array.len() { + if i > 0 { + write!(f, ", ")?; + } + match array.scalar_at(i) { + Ok(scalar) => write!(f, "{scalar}")?, + Err(_) => write!(f, "")?, + } + } + write!(f, "]") + } + } + } +} + +impl PartialEq for ScalarValue { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (ScalarValue::Bool(a), ScalarValue::Bool(b)) => a == b, + (ScalarValue::Primitive(a), ScalarValue::Primitive(b)) => a == b, + (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a == b, + (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a == b, + (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a == b, + (ScalarValue::Struct(a), ScalarValue::Struct(b)) => a == b, + (ScalarValue::Array(a), ScalarValue::Array(b)) => { + // Compare element-by-element. + a.len() == b.len() + && a.dtype().eq_ignore_nullability(b.dtype()) + && (0..a.len()).all(|i| a.scalar_at(i).ok() == b.scalar_at(i).ok()) + } + _ => false, + } + } +} + +impl Eq for ScalarValue {} + +impl PartialOrd for ScalarValue { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (ScalarValue::Bool(a), ScalarValue::Bool(b)) => a.partial_cmp(b), + (ScalarValue::Primitive(a), ScalarValue::Primitive(b)) => a.partial_cmp(b), + (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a.partial_cmp(b), + (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a.partial_cmp(b), + (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a.partial_cmp(b), + (ScalarValue::Struct(a), ScalarValue::Struct(b)) => a.partial_cmp(b), + // Two arrays: lexicographic element-by-element comparison. + (ScalarValue::Array(a), ScalarValue::Array(b)) => { + let min_len = a.len().min(b.len()); + + // Compute the lexicographic ordering. + for i in 0..min_len { + let a_scalar = a.scalar_at(i).vortex_expect( + "something happened with scalar_at in `PartialOrd` of `ListScalar`", + ); + let b_scalar = b.scalar_at(i).vortex_expect( + "something happened with scalar_at in `PartialOrd` of `ListScalar`", + ); + + match a_scalar.partial_cmp(&b_scalar)? { + Ordering::Equal => continue, + ord => return Some(ord), + } + } + + a.len().partial_cmp(&b.len()) + } + _ => None, + } + } +} + +impl Hash for ScalarValue { + fn hash(&self, state: &mut H) { + match self { + ScalarValue::Bool(b) => b.hash(state), + ScalarValue::Primitive(p) => p.hash(state), + ScalarValue::Decimal(d) => d.hash(state), + ScalarValue::Utf8(s) => s.hash(state), + ScalarValue::Binary(b) => b.hash(state), + ScalarValue::Struct(l) => l.hash(state), + ScalarValue::Array(array) => { + array.len().hash(state); + for i in 0..array.len() { + let scalar = array.scalar_at(i).vortex_expect( + "something happened with `scalar_at` in `Hash` of `ScalarValue::Array`", + ); + scalar.hash(state); + } + } } } } diff --git a/vortex-array/src/scalar/tests/casting.rs b/vortex-array/src/scalar/tests/casting.rs index 72b44a5ed55..bccff22fd27 100644 --- a/vortex-array/src/scalar/tests/casting.rs +++ b/vortex-array/src/scalar/tests/casting.rs @@ -158,7 +158,7 @@ mod tests { Some(ScalarValue::Primitive(PValue::F32(f32_value))), ]; - let scalar = Scalar::new(struct_dtype, Some(ScalarValue::List(field_values))); + let scalar = Scalar::new(struct_dtype, Some(ScalarValue::Struct(field_values))); let struct_scalar = scalar.as_struct(); let fields: Vec<_> = (0..3) @@ -201,22 +201,17 @@ mod tests { let f16_value1 = f16::from_f32(1.0); let f16_value2 = f16::from_f32(2.0); - let list_dtype = DType::List( - Arc::new(DType::Primitive(PType::F16, Nullability::NonNullable)), + let element_dtype = Arc::new(DType::Primitive(PType::F16, Nullability::NonNullable)); + + let scalar = Scalar::list_from_scalars( + element_dtype, + vec![ + Scalar::primitive(f16_value1, Nullability::NonNullable), + Scalar::primitive(f16_value2, Nullability::NonNullable), + ], Nullability::NonNullable, ); - let elements = vec![ - Some(ScalarValue::Primitive(PValue::U64( - f16_value1.to_bits() as u64 - ))), - Some(ScalarValue::Primitive(PValue::U64( - f16_value2.to_bits() as u64 - ))), - ]; - - let scalar = Scalar::new(list_dtype, Some(ScalarValue::List(elements))); - let list_scalar = scalar.as_list(); let elements = list_scalar.elements().unwrap(); @@ -371,7 +366,7 @@ mod tests { let scalar = Scalar::new( DType::Extension(ext_dtype.erased()), - Some(ScalarValue::List(field_values)), + Some(ScalarValue::Struct(field_values)), ); // Verify the struct field was coerced diff --git a/vortex-array/src/scalar/tests/nested.rs b/vortex-array/src/scalar/tests/nested.rs index c4b28418c36..3db4071571f 100644 --- a/vortex-array/src/scalar/tests/nested.rs +++ b/vortex-array/src/scalar/tests/nested.rs @@ -10,9 +10,7 @@ mod tests { use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; - use crate::scalar::PValue; use crate::scalar::Scalar; - use crate::scalar::ScalarValue; #[test] fn test_fixed_size_list_of_fixed_size_list() { @@ -25,7 +23,7 @@ mod tests { )); // Create inner FixedSizeLists. - let inner_list1 = Scalar::fixed_size_list( + let inner_list1 = Scalar::fixed_size_list_from_scalars( inner_element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -35,7 +33,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::fixed_size_list( + let inner_list2 = Scalar::fixed_size_list_from_scalars( inner_element_dtype, vec![ Scalar::primitive(4i32, Nullability::NonNullable), @@ -46,7 +44,7 @@ mod tests { ); // Create outer FixedSizeList. - let outer_list = Scalar::fixed_size_list( + let outer_list = Scalar::fixed_size_list_from_scalars( inner_dtype, vec![inner_list1, inner_list2], Nullability::NonNullable, @@ -91,7 +89,7 @@ mod tests { Nullability::NonNullable, )); - let inner_list1 = Scalar::list( + let inner_list1 = Scalar::list_from_scalars( inner_element_dtype.clone(), vec![ Scalar::primitive(10i32, Nullability::NonNullable), @@ -100,7 +98,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::list( + let inner_list2 = Scalar::list_from_scalars( inner_element_dtype, vec![ Scalar::primitive(30i32, Nullability::NonNullable), @@ -110,7 +108,7 @@ mod tests { Nullability::NonNullable, ); - let outer_fixed_list = Scalar::fixed_size_list( + let outer_fixed_list = Scalar::fixed_size_list_from_scalars( inner_dtype, vec![inner_list1, inner_list2], Nullability::NonNullable, @@ -138,7 +136,7 @@ mod tests { Nullability::NonNullable, )); - let fixed_list1 = Scalar::fixed_size_list( + let fixed_list1 = Scalar::fixed_size_list_from_scalars( inner_element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -148,7 +146,7 @@ mod tests { Nullability::NonNullable, ); - let fixed_list2 = Scalar::fixed_size_list( + let fixed_list2 = Scalar::fixed_size_list_from_scalars( inner_element_dtype, vec![ Scalar::primitive(4i32, Nullability::NonNullable), @@ -158,7 +156,7 @@ mod tests { Nullability::NonNullable, ); - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( inner_dtype, vec![fixed_list1, fixed_list2], Nullability::NonNullable, @@ -202,7 +200,7 @@ mod tests { ], ); - let fixed_list_of_structs = Scalar::fixed_size_list( + let fixed_list_of_structs = Scalar::fixed_size_list_from_scalars( Arc::new(struct_dtype), vec![struct1, struct2], Nullability::NonNullable, @@ -255,7 +253,7 @@ mod tests { Nullability::NonNullable, ); - let fixed_list_field = Scalar::fixed_size_list( + let fixed_list_field = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)), vec![ Scalar::primitive(10i64, Nullability::NonNullable), @@ -306,15 +304,13 @@ mod tests { #[test] fn test_list_cast_element_types() { // Test casting list elements between different primitive types. - let list = Scalar::new( - DType::List( - Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::List(vec![ - Some(ScalarValue::Primitive(PValue::U16(6))), - Some(ScalarValue::Primitive(PValue::U16(100))), - ])), + let list = Scalar::list_from_scalars( + Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), + vec![ + Scalar::primitive(6u16, Nullability::Nullable), + Scalar::primitive(100u16, Nullability::Nullable), + ], + Nullability::Nullable, ); // Cast U16 -> U32. @@ -345,16 +341,14 @@ mod tests { #[test] fn test_list_cast_element_overflow() { // Test that casting U16 values too large for U8 fails. - let list_with_large_values = Scalar::new( - DType::List( - Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::List(vec![ - Some(ScalarValue::Primitive(PValue::U16(100))), - Some(ScalarValue::Primitive(PValue::U16(256))), // Too large for U8 - Some(ScalarValue::Primitive(PValue::U16(1000))), // Too large for U8 - ])), + let list_with_large_values = Scalar::list_from_scalars( + Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), + vec![ + Scalar::primitive(100u16, Nullability::Nullable), + Scalar::primitive(256u16, Nullability::Nullable), // Too large for U8. + Scalar::primitive(1000u16, Nullability::Nullable), // Too large for U8. + ], + Nullability::Nullable, ); let target_u8 = DType::List( @@ -375,7 +369,7 @@ mod tests { #[test] fn test_list_cast_nested_lists() { // Create a list of lists. - let inner_list1 = Scalar::list( + let inner_list1 = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -383,7 +377,7 @@ mod tests { ], Nullability::NonNullable, ); - let inner_list2 = Scalar::list( + let inner_list2 = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(3i32, Nullability::NonNullable), @@ -391,7 +385,7 @@ mod tests { ], Nullability::NonNullable, ); - let nested_list = Scalar::list( + let nested_list = Scalar::list_from_scalars( Arc::from(DType::List( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), Nullability::NonNullable, @@ -415,7 +409,7 @@ mod tests { #[test] fn test_list_cast_empty_list() { // Test casting empty list. - let empty_list = Scalar::list( + let empty_list = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![], Nullability::NonNullable, @@ -443,7 +437,7 @@ mod tests { #[test] fn test_list_cast_string_elements() { // Create a list of strings. - let string_list = Scalar::list( + let string_list = Scalar::list_from_scalars( Arc::from(DType::Utf8(Nullability::NonNullable)), vec![ Scalar::utf8("hello", Nullability::NonNullable), @@ -485,7 +479,7 @@ mod tests { Scalar::primitive(20i64, Nullability::NonNullable), ], ); - let struct_list = Scalar::list( + let struct_list = Scalar::list_from_scalars( Arc::from(struct_dtype), vec![struct1, struct2], Nullability::NonNullable, @@ -507,7 +501,7 @@ mod tests { #[test] fn test_list_cast_incompatible_element_types() { // Create a list of integers. - let int_list = Scalar::list( + let int_list = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![Scalar::primitive(1i32, Nullability::NonNullable)], Nullability::NonNullable, @@ -524,7 +518,7 @@ mod tests { #[test] fn test_list_to_fixed_size_list_cast() { // Create a list with 3 elements. - let list = Scalar::list( + let list = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -548,7 +542,7 @@ mod tests { #[test] fn test_fixed_size_list_to_list_cast() { // Create a FixedSizeList with 2 elements. - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(10i32, Nullability::NonNullable), @@ -570,7 +564,7 @@ mod tests { #[test] fn test_fixed_size_list_to_fixed_size_list_cast() { // Create a FixedSizeList[4] with I32 elements. - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -594,7 +588,7 @@ mod tests { #[test] fn test_fixed_size_list_size_mismatch_error() { // Create a list with 2 elements. - let list = Scalar::list( + let list = Scalar::list_from_scalars( Arc::from(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -645,7 +639,7 @@ mod tests { )); // Create inner FixedSizeList for struct field. - let inner_list1 = Scalar::fixed_size_list( + let inner_list1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)), vec![ Scalar::primitive(100i64, Nullability::NonNullable), @@ -654,7 +648,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::fixed_size_list( + let inner_list2 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)), vec![ Scalar::primitive(300i64, Nullability::NonNullable), @@ -681,14 +675,14 @@ mod tests { ); // Create middle FixedSizeList[2] of structs. - let middle_list = Scalar::fixed_size_list( + let middle_list = Scalar::fixed_size_list_from_scalars( struct_dtype, vec![struct1, struct2], Nullability::NonNullable, ); // Create outer List. - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( middle_fixed_list_dtype, vec![middle_list.clone(), middle_list], Nullability::NonNullable, @@ -737,7 +731,7 @@ mod tests { Nullability::NonNullable, )); - let inner_list1 = Scalar::fixed_size_list( + let inner_list1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -746,7 +740,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::fixed_size_list( + let inner_list2 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(3i32, Nullability::NonNullable), @@ -755,7 +749,7 @@ mod tests { Nullability::NonNullable, ); - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( inner_dtype, vec![inner_list1, inner_list2], Nullability::NonNullable, @@ -804,7 +798,7 @@ mod tests { )); // Create innermost FixedSizeLists. - let inner_fixed1 = Scalar::fixed_size_list( + let inner_fixed1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -813,7 +807,7 @@ mod tests { Nullability::NonNullable, ); - let inner_fixed2 = Scalar::fixed_size_list( + let inner_fixed2 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(3i32, Nullability::NonNullable), @@ -823,20 +817,20 @@ mod tests { ); // Create middle Lists. - let middle_list1 = Scalar::list( + let middle_list1 = Scalar::list_from_scalars( innermost_dtype.clone(), vec![inner_fixed1.clone()], Nullability::NonNullable, ); - let middle_list2 = Scalar::list( + let middle_list2 = Scalar::list_from_scalars( innermost_dtype, vec![inner_fixed2, inner_fixed1], Nullability::NonNullable, ); // Create outer FixedSizeList. - let outer = Scalar::fixed_size_list( + let outer = Scalar::fixed_size_list_from_scalars( middle_dtype, vec![middle_list1, middle_list2], Nullability::NonNullable, @@ -861,7 +855,7 @@ mod tests { Nullability::NonNullable, )); - let inner_list1 = Scalar::fixed_size_list( + let inner_list1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -870,7 +864,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::fixed_size_list( + let inner_list2 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(3i32, Nullability::NonNullable), @@ -879,7 +873,7 @@ mod tests { Nullability::NonNullable, ); - let outer = Scalar::fixed_size_list( + let outer = Scalar::fixed_size_list_from_scalars( inner_dtype, vec![inner_list1, inner_list2], Nullability::NonNullable, @@ -899,7 +893,8 @@ mod tests { Scalar::primitive(20i32, Nullability::NonNullable), Scalar::primitive(30i32, Nullability::NonNullable), ]; - let fixed_list = Scalar::fixed_size_list(element_dtype, children, Nullability::NonNullable); + let fixed_list = + Scalar::fixed_size_list_from_scalars(element_dtype, children, Nullability::NonNullable); assert!(matches!(fixed_list.dtype(), DType::FixedSizeList(_, 3, _))); @@ -926,8 +921,11 @@ mod tests { fn test_fixed_size_list_size_zero() { // Test FixedSizeList[0] behavior. let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let empty_fixed_list = - Scalar::fixed_size_list(element_dtype.clone(), vec![], Nullability::NonNullable); + let empty_fixed_list = Scalar::fixed_size_list_from_scalars( + element_dtype.clone(), + vec![], + Nullability::NonNullable, + ); assert!(matches!( empty_fixed_list.dtype(), @@ -948,7 +946,7 @@ mod tests { #[test] fn test_fixed_size_list_cast_to_list() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -984,7 +982,7 @@ mod tests { #[test] fn test_list_cast_to_fixed_size_list() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let list = Scalar::list( + let list = Scalar::list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(10i32, Nullability::NonNullable), @@ -1021,7 +1019,7 @@ mod tests { #[test] fn test_fixed_size_list_size_validation() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let list = Scalar::list( + let list = Scalar::list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1051,7 +1049,7 @@ mod tests { #[test] fn test_fixed_size_list_display() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( element_dtype, vec![ Scalar::primitive(100i32, Nullability::NonNullable), @@ -1071,7 +1069,7 @@ mod tests { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); // Create two equal fixed size lists. - let list1 = Scalar::fixed_size_list( + let list1 = Scalar::fixed_size_list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1080,7 +1078,7 @@ mod tests { Nullability::NonNullable, ); - let list2 = Scalar::fixed_size_list( + let list2 = Scalar::fixed_size_list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1092,7 +1090,7 @@ mod tests { assert_eq!(list1, list2); // Create a different list. - let list3 = Scalar::fixed_size_list( + let list3 = Scalar::fixed_size_list_from_scalars( element_dtype, vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1112,7 +1110,7 @@ mod tests { fn test_fixed_size_list_single_element() { // Test FixedSizeList[1]. let element_dtype = Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)); - let single_element = Scalar::fixed_size_list( + let single_element = Scalar::fixed_size_list_from_scalars( element_dtype.clone(), vec![Scalar::primitive(42i64, Nullability::NonNullable)], Nullability::NonNullable, @@ -1139,7 +1137,7 @@ mod tests { #[test] fn test_fixed_size_list_with_strings() { let element_dtype = Arc::new(DType::Utf8(Nullability::NonNullable)); - let string_list = Scalar::fixed_size_list( + let string_list = Scalar::fixed_size_list_from_scalars( element_dtype, vec![ Scalar::utf8("hello", Nullability::NonNullable), @@ -1171,7 +1169,7 @@ mod tests { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); // FixedSizeList[3] of i32 = 3 * 4 bytes = 12 bytes. - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1183,11 +1181,12 @@ mod tests { assert_eq!(fixed_list.approx_nbytes(), 12); // Empty FixedSizeList[0] = 0 bytes. - let empty_list = Scalar::fixed_size_list(element_dtype, vec![], Nullability::NonNullable); + let empty_list = + Scalar::fixed_size_list_from_scalars(element_dtype, vec![], Nullability::NonNullable); assert_eq!(empty_list.approx_nbytes(), 0); // FixedSizeList with strings. - let string_list = Scalar::fixed_size_list( + let string_list = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Utf8(Nullability::NonNullable)), vec![ Scalar::utf8("ab", Nullability::NonNullable), // 2 bytes @@ -1201,7 +1200,7 @@ mod tests { #[test] fn test_fixed_size_list_casting_with_nullability() { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)); - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( element_dtype, vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -1239,7 +1238,7 @@ mod tests { #[test] fn test_list_basic_operations() { // Create a simple list. - let list = Scalar::list( + let list = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1257,7 +1256,7 @@ mod tests { #[test] fn test_list_of_lists() { // Create nested lists without FixedSizeList. - let inner_list1 = Scalar::list( + let inner_list1 = Scalar::list_from_scalars( Arc::new(DType::Utf8(Nullability::NonNullable)), vec![ Scalar::utf8("a", Nullability::NonNullable), @@ -1266,7 +1265,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::list( + let inner_list2 = Scalar::list_from_scalars( Arc::new(DType::Utf8(Nullability::NonNullable)), vec![ Scalar::utf8("c", Nullability::NonNullable), @@ -1276,7 +1275,7 @@ mod tests { Nullability::NonNullable, ); - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( Arc::new(DType::List( Arc::new(DType::Utf8(Nullability::NonNullable)), Nullability::NonNullable, @@ -1387,7 +1386,7 @@ mod tests { ], ); - let list_of_structs = Scalar::list( + let list_of_structs = Scalar::list_from_scalars( Arc::new(struct_dtype), vec![struct1, struct2, struct3], Nullability::NonNullable, @@ -1427,7 +1426,7 @@ mod tests { Nullability::NonNullable, ); - let tags_list = Scalar::list( + let tags_list = Scalar::list_from_scalars( Arc::new(DType::Utf8(Nullability::NonNullable)), vec![ Scalar::utf8("rust", Nullability::NonNullable), @@ -1474,7 +1473,7 @@ mod tests { #[test] fn test_deeply_nested_lists_only() { // Create a 3-level nested list structure without any FixedSizeList. - let level3 = Scalar::list( + let level3 = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -1483,7 +1482,7 @@ mod tests { Nullability::NonNullable, ); - let level2 = Scalar::list( + let level2 = Scalar::list_from_scalars( Arc::new(DType::List( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), Nullability::NonNullable, @@ -1492,7 +1491,7 @@ mod tests { Nullability::NonNullable, ); - let level1 = Scalar::list( + let level1 = Scalar::list_from_scalars( Arc::new(DType::List( Arc::new(DType::List( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), diff --git a/vortex-array/src/scalar/tests/nullability.rs b/vortex-array/src/scalar/tests/nullability.rs index d70796d20ce..8cbcac0bb66 100644 --- a/vortex-array/src/scalar/tests/nullability.rs +++ b/vortex-array/src/scalar/tests/nullability.rs @@ -15,9 +15,7 @@ mod tests { use crate::extension::datetime::Date; use crate::extension::datetime::TimeUnit; use crate::extension::datetime::Timestamp; - use crate::scalar::PValue; use crate::scalar::Scalar; - use crate::scalar::ScalarValue; #[rstest] fn null_can_cast_to_anything_nullable( @@ -49,14 +47,10 @@ mod tests { #[test] fn test_list_cast_nullability_changes() { - let list = Scalar::new( - DType::List( - Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::List(vec![Some(ScalarValue::Primitive( - PValue::U16(6), - ))])), + let list = Scalar::list_from_scalars( + Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), + vec![Scalar::primitive(6u16, Nullability::Nullable)], + Nullability::Nullable, ); // Change element nullability from Nullable to NonNullable. @@ -92,19 +86,17 @@ mod tests { #[test] fn test_list_cast_with_null_elements() { - let list_with_null = Scalar::new( - DType::List( - Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::List(vec![ - Some(ScalarValue::Primitive(PValue::U16(6))), - None, - Some(ScalarValue::Primitive(PValue::U16(10))), - ])), + let list_with_null = Scalar::list_from_scalars( + Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), + vec![ + Scalar::primitive(6u16, Nullability::Nullable), + Scalar::null(DType::Primitive(PType::U16, Nullability::Nullable)), + Scalar::primitive(10u16, Nullability::Nullable), + ], + Nullability::Nullable, ); - // Cast to different element type with nullable elements - should succeed. + // Cast to different element type with nullable elements — should succeed. let target_u8_nullable = DType::List( Arc::from(DType::Primitive(PType::U8, Nullability::Nullable)), Nullability::Nullable, @@ -154,7 +146,7 @@ mod tests { assert_eq!(struct_with_null.approx_nbytes(), 4 + 8); // Test list with null elements - let list_with_null = Scalar::list( + let list_with_null = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -195,15 +187,13 @@ mod tests { #[test] fn test_list_cast_null_to_nonnull_error() { - let list_with_null = Scalar::new( - DType::List( - Arc::from(DType::Primitive(PType::U16, Nullability::Nullable)), - Nullability::Nullable, - ), - Some(ScalarValue::List(vec![ - Some(ScalarValue::Primitive(PValue::U16(6))), - None, - ])), + let list_with_null = Scalar::list_from_scalars( + Arc::new(DType::Primitive(PType::U16, Nullability::Nullable)), + vec![ + Scalar::primitive(6u16, Nullability::Nullable), + Scalar::null(DType::Primitive(PType::U16, Nullability::Nullable)), + ], + Nullability::Nullable, ); // Casting to non-nullable element type should fail. @@ -218,7 +208,7 @@ mod tests { fn test_fixed_size_list_null_elements() { // Create FixedSizeList with null elements. let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)); - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( element_dtype, vec![ Scalar::primitive(10i32, Nullability::Nullable), @@ -259,7 +249,7 @@ mod tests { )); // First inner list: non-null container with one null element. - let inner_list1 = Scalar::fixed_size_list( + let inner_list1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -276,7 +266,7 @@ mod tests { )); // Third inner list: non-null container with non-null elements. - let inner_list3 = Scalar::fixed_size_list( + let inner_list3 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(3i32, Nullability::Nullable), @@ -285,7 +275,7 @@ mod tests { Nullability::Nullable, ); - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( inner_dtype, vec![inner_list1, inner_list2, inner_list3], Nullability::NonNullable, @@ -351,7 +341,7 @@ mod tests { assert!(!struct_view.field("name").unwrap().is_null()); // Create struct with non-null FixedSizeList containing null elements. - let fixed_list_with_nulls = Scalar::fixed_size_list( + let fixed_list_with_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I64, Nullability::Nullable)), vec![ Scalar::primitive(100i64, Nullability::Nullable), @@ -380,7 +370,7 @@ mod tests { fn test_list_partial_null_elements() { // Test List with mixture of null and non-null elements. let element_dtype = Arc::new(DType::Primitive(PType::F32, Nullability::Nullable)); - let list_with_nulls = Scalar::list( + let list_with_nulls = Scalar::list_from_scalars( element_dtype, vec![ Scalar::primitive(1.5f32, Nullability::Nullable), @@ -409,7 +399,7 @@ mod tests { // Test that casting null elements to non-nullable fails. // FixedSizeList with null elements can't cast to non-nullable elements. - let fixed_list_with_nulls = Scalar::fixed_size_list( + let fixed_list_with_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -447,7 +437,7 @@ mod tests { #[test] fn test_nullable_to_nonnullable_valid_cast() { // Test that casting nullable types to non-nullable succeeds when no nulls present. - let fixed_list_no_nulls = Scalar::fixed_size_list( + let fixed_list_no_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(10i32, Nullability::Nullable), @@ -486,7 +476,7 @@ mod tests { assert!(null_list.is_null()); // Case 2: Non-null FixedSizeList with all null elements. - let list_all_nulls = Scalar::fixed_size_list( + let list_all_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable)), @@ -501,7 +491,7 @@ mod tests { assert!(list_all_nulls.as_list().element(2).unwrap().is_null()); // Case 3: Non-null FixedSizeList with some null elements. - let list_some_nulls = Scalar::fixed_size_list( + let list_some_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -592,7 +582,7 @@ mod tests { let middle_dtype = Arc::new(DType::List(innermost_dtype.clone(), Nullability::Nullable)); // Create innermost FixedSizeLists with different null patterns. - let inner1 = Scalar::fixed_size_list( + let inner1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), @@ -607,7 +597,7 @@ mod tests { Nullability::Nullable, )); - let _inner3 = Scalar::fixed_size_list( + let _inner3 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(3i32, Nullability::Nullable), @@ -617,7 +607,7 @@ mod tests { ); // Create middle Lists. - let middle1 = Scalar::list( + let middle1 = Scalar::list_from_scalars( innermost_dtype.clone(), vec![inner1, inner2], Nullability::Nullable, @@ -626,7 +616,7 @@ mod tests { let middle2 = Scalar::null(DType::List(innermost_dtype, Nullability::Nullable)); // Create outer FixedSizeList. - let outer = Scalar::fixed_size_list( + let outer = Scalar::fixed_size_list_from_scalars( middle_dtype, vec![middle1, middle2], Nullability::NonNullable, @@ -674,7 +664,7 @@ mod tests { assert_ne!(null_list1, null_list3); // Non-null list with null elements vs null list. - let list_with_nulls = Scalar::fixed_size_list( + let list_with_nulls = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable)), @@ -723,7 +713,7 @@ mod tests { ); // Create struct with mixed null patterns. - let fixed_list_field = Scalar::fixed_size_list( + let fixed_list_field = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(10i32, Nullability::Nullable), @@ -737,7 +727,7 @@ mod tests { Nullability::Nullable, )); - let inner_fixed1 = Scalar::fixed_size_list( + let inner_fixed1 = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::U8, Nullability::Nullable)), vec![ Scalar::primitive(1u8, Nullability::Nullable), @@ -752,7 +742,7 @@ mod tests { Nullability::Nullable, )); - let nested_fixed_field = Scalar::fixed_size_list( + let nested_fixed_field = Scalar::fixed_size_list_from_scalars( Arc::new(DType::FixedSizeList( Arc::new(DType::Primitive(PType::U8, Nullability::Nullable)), 2, @@ -786,7 +776,7 @@ mod tests { #[test] fn test_casting_preserves_null_positions() { // Ensure that casting preserves the positions of null elements. - let fixed_list = Scalar::fixed_size_list( + let fixed_list = Scalar::fixed_size_list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), diff --git a/vortex-array/src/scalar/tests/primitives.rs b/vortex-array/src/scalar/tests/primitives.rs index d2e32ce6ef0..1eac66ffee0 100644 --- a/vortex-array/src/scalar/tests/primitives.rs +++ b/vortex-array/src/scalar/tests/primitives.rs @@ -116,7 +116,7 @@ mod tests { assert_eq!(struct_scalar.approx_nbytes(), 4 + 8); // i32 + i64 // Test list scalar - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -245,7 +245,7 @@ mod tests { assert_eq!(struct_with_null.approx_nbytes(), 4 + 8); // Test list with null elements - let list_with_null = Scalar::list( + let list_with_null = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::Nullable)), vec![ Scalar::primitive(1i32, Nullability::Nullable), diff --git a/vortex-array/src/scalar/tests/round_trip.rs b/vortex-array/src/scalar/tests/round_trip.rs index c904a27f2ae..6eb0ca78389 100644 --- a/vortex-array/src/scalar/tests/round_trip.rs +++ b/vortex-array/src/scalar/tests/round_trip.rs @@ -156,7 +156,8 @@ mod tests { Scalar::primitive(2i32, Nullability::NonNullable), Scalar::primitive(3i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children.clone(), Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children.clone(), Nullability::NonNullable); // Extract as ListScalar let list_specialized = list_scalar.as_list(); @@ -209,7 +210,7 @@ mod tests { #[test] fn test_protobuf_edge_cases() { // Test empty list - let empty_list = Scalar::list( + let empty_list = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![], Nullability::NonNullable, @@ -222,7 +223,7 @@ mod tests { let inner_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); let outer_dtype = Arc::new(DType::List(inner_dtype.clone(), Nullability::NonNullable)); - let inner_list1 = Scalar::list( + let inner_list1 = Scalar::list_from_scalars( inner_dtype, vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -231,7 +232,8 @@ mod tests { Nullability::NonNullable, ); - let nested_list = Scalar::list(outer_dtype, vec![inner_list1], Nullability::NonNullable); + let nested_list = + Scalar::list_from_scalars(outer_dtype, vec![inner_list1], Nullability::NonNullable); let pb_nested = pb::Scalar::from(&nested_list); let round_tripped_nested = Scalar::from_proto(&pb_nested, &SESSION).unwrap(); diff --git a/vortex-array/src/scalar/typed_view/list.rs b/vortex-array/src/scalar/typed_view/list.rs index 6c4206f2a0e..d9cdda751ce 100644 --- a/vortex-array/src/scalar/typed_view/list.rs +++ b/vortex-array/src/scalar/typed_view/list.rs @@ -3,9 +3,11 @@ //! [`ListScalar`] typed view implementation. +use std::cmp::Ordering; use std::fmt::Display; use std::fmt::Formatter; use std::hash::Hash; +use std::hash::Hasher; use std::sync::Arc; use itertools::Itertools; @@ -15,6 +17,8 @@ use vortex_error::vortex_bail; use vortex_error::vortex_err; use vortex_error::vortex_panic; +use crate::ArrayRef; +use crate::builders::build_array_from_scalars; use crate::dtype::DType; use crate::scalar::Scalar; use crate::scalar::ScalarValue; @@ -28,71 +32,20 @@ use crate::scalar::ScalarValue; /// more elements of the same type, or be null. If the `dtype` is a [`FixedSizeList`], then the /// number of `elements` is equal to the `size` field of the [`FixedSizeList`]. /// +/// The underlying data is always stored as a [`ScalarValue::Array`]. +/// /// [`FixedSizeList`]: DType::FixedSizeList #[derive(Debug, Clone)] pub struct ListScalar<'a> { /// The data type of this scalar. dtype: &'a DType, + /// A convenience field so that we do not have to unwrap and check the top-level `dtype` field /// every time we want to access this. element_dtype: &'a Arc, - /// The elements of the list. `None` if the entire list is null. - /// Each element is `Option` where `None` represents a null element within the - /// list. - elements: Option<&'a [Option]>, -} - -impl Display for ListScalar<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match &self.elements { - None => write!(f, "null"), - Some(elems) => { - let type_str: &dyn Display = if let DType::FixedSizeList(_, size, _) = self.dtype { - &format!("fixed_size<{size}>") - } else { - &"" - }; - - write!( - f, - "{type_str}[{}]", - elems - .iter() - .map(|e| Scalar::try_new(self.element_dtype().clone(), e.clone()) - .vortex_expect("`ListScalar` is already a valid `Scalar`")) - .format(", ") - ) - } - } - } -} - -impl PartialEq for ListScalar<'_> { - fn eq(&self, other: &Self) -> bool { - self.dtype.eq_ignore_nullability(other.dtype) && self.elements() == other.elements() - } -} - -impl Eq for ListScalar<'_> {} -/// Ord is not implemented since it's undefined for different element DTypes -impl PartialOrd for ListScalar<'_> { - fn partial_cmp(&self, other: &Self) -> Option { - if !self - .element_dtype - .eq_ignore_nullability(other.element_dtype) - { - return None; - } - self.elements().partial_cmp(&other.elements()) - } -} - -impl Hash for ListScalar<'_> { - fn hash(&self, state: &mut H) { - self.dtype.hash(state); - self.elements().hash(state); - } + /// The underlying array, or `None` if the scalar is null. + array: Option<&'a ArrayRef>, } impl<'a> ListScalar<'a> { @@ -109,7 +62,7 @@ impl<'a> ListScalar<'a> { Ok(Self { dtype, element_dtype, - elements: value.map(|v| v.as_list()), + array: value.map(|v| v.as_list()), }) } @@ -124,22 +77,27 @@ impl<'a> ListScalar<'a> { /// Returns 0 if the list is null. #[inline] pub fn len(&self) -> usize { - self.elements.as_ref().map(|e| e.len()).unwrap_or(0) + self.array.map(|a| a.len()).unwrap_or(0) } /// Returns true if the list has no elements or is null. #[inline] pub fn is_empty(&self) -> bool { - match self.elements.as_ref() { - None => true, - Some(l) => l.is_empty(), - } + self.array.map(|a| a.is_empty()).unwrap_or(true) } /// Returns true if the list is null. #[inline] pub fn is_null(&self) -> bool { - self.elements.is_none() + self.array.is_none() + } + + /// Returns the underlying [`ArrayRef`]. + /// + /// This is useful for bulk operations (e.g., in builders) that can avoid expanding the array + /// into individual scalars. + pub fn as_array(&self) -> Option<&'a ArrayRef> { + self.array } /// Returns the data type of the list's elements. @@ -152,27 +110,40 @@ impl<'a> ListScalar<'a> { /// Returns the element at the given index as a scalar. /// - /// Returns None if the list is null or the index is out of bounds. + /// Returns `None` if the list is null or the index is out of bounds. pub fn element(&self, idx: usize) -> Option { - self.elements.and_then(|l| l.get(idx)).map(|value| { - // SAFETY: `ListScalar` is already a valid `Scalar`. - unsafe { Scalar::new_unchecked(self.element_dtype().clone(), value.clone()) } - }) + let array = self.array?; + if idx >= array.len() { + return None; + } + + Some( + array + .scalar_at(idx) + .vortex_expect("index already bounds-checked"), + ) } /// Returns all elements in the list as a vector of scalars. /// - /// Returns None if the list is null. + /// Returns `None` if the list is null. + /// + /// This always materializes every element into a `Vec` by calling `Array::scalar_at()` for each + /// element. + /// + /// Prefer [`as_array()`](Self::as_array) when the underlying array can be used directly (e.g., + /// in builders). pub fn elements(&self) -> Option> { - self.elements.map(|elems| { - elems - .iter() - .map(|e| { - // SAFETY: `ListScalar` is already a valid `Scalar`. - unsafe { Scalar::new_unchecked(self.element_dtype().clone(), e.clone()) } + let array = self.array?; + Some( + (0..array.len()) + .map(|i| { + array + .scalar_at(i) + .vortex_expect("index already bounds-checked") }) - .collect_vec() - }) + .collect_vec(), + ) } /// Casts the list to the target [`DType`]. @@ -205,21 +176,114 @@ impl<'a> ListScalar<'a> { ) } - Scalar::try_new( - dtype.clone(), - Some(ScalarValue::List( - self.elements - .ok_or_else(|| vortex_err!("nullness should be handled in Scalar::cast"))? - .iter() - .map(|element| { - // Recursively cast the elements of the list. - Scalar::try_new(DType::clone(self.element_dtype), element.clone())? - .cast(target_element_dtype) - .map(|x| x.into_value()) - }) - .collect::>>>()?, - )), - ) + let array = self + .array + .ok_or_else(|| vortex_err!("nullness should be handled in Scalar::cast"))?; + + // Cast each element and build a new array with the target element dtype. + let casted_elements = (0..array.len()) + .map(|i| { + array + .scalar_at(i) + .vortex_expect("index already bounds-checked") + .cast(target_element_dtype) + }) + .collect::>>()?; + + let casted_array = build_array_from_scalars(target_element_dtype, &casted_elements); + + Scalar::try_new(dtype.clone(), Some(ScalarValue::Array(casted_array))) + } +} + +impl Display for ListScalar<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.elements() { + None => write!(f, "null"), + Some(scalars) => { + let type_str: &dyn Display = if let DType::FixedSizeList(_, size, _) = self.dtype { + &format!("fixed_size<{size}>") + } else { + &"" + }; + + write!(f, "{type_str}[{}]", scalars.iter().format(", ")) + } + } + } +} + +impl PartialEq for ListScalar<'_> { + fn eq(&self, other: &Self) -> bool { + if !self.dtype.eq_ignore_nullability(other.dtype) { + return false; + } + + match (self.array, other.array) { + (None, None) => true, + (Some(a), Some(b)) => { + // Compare element-by-element. + a.len() == b.len() + && (0..a.len()).all(|i| a.scalar_at(i).ok() == b.scalar_at(i).ok()) + } + _ => false, + } + } +} + +impl Eq for ListScalar<'_> {} + +/// Ord is not implemented since it's undefined for different element DTypes. +impl PartialOrd for ListScalar<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + if !self + .element_dtype + .eq_ignore_nullability(other.element_dtype) + { + return None; + } + + match (self.array, other.array) { + (None, None) => Some(Ordering::Equal), + (None, Some(_)) => Some(Ordering::Less), + (Some(_), None) => Some(Ordering::Greater), + (Some(a), Some(b)) => { + let min_len = a.len().min(b.len()); + + // Compute the lexicographic ordering. + for i in 0..min_len { + let a_scalar = a.scalar_at(i).vortex_expect( + "something happened with scalar_at in `PartialOrd` of `ListScalar`", + ); + let b_scalar = b.scalar_at(i).vortex_expect( + "something happened with scalar_at in `PartialOrd` of `ListScalar`", + ); + + match a_scalar.partial_cmp(&b_scalar)? { + Ordering::Equal => continue, + ord => return Some(ord), + } + } + + a.len().partial_cmp(&b.len()) + } + } + } +} + +impl Hash for ListScalar<'_> { + fn hash(&self, state: &mut H) { + self.dtype.hash(state); + self.len().hash(state); + + if let Some(array) = self.array { + for i in 0..array.len() { + let scalar = array + .scalar_at(i) + .vortex_expect("something happened with `scalar_at` in `Hash` of `ListScalar`"); + scalar.hash(state); + } + } } } @@ -240,7 +304,8 @@ mod tests { Scalar::primitive(2i32, Nullability::NonNullable), Scalar::primitive(3i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); assert_eq!(list.len(), 3); @@ -277,11 +342,12 @@ mod tests { Scalar::primitive(20i32, Nullability::NonNullable), Scalar::primitive(30i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); - // Test element access + // Test element access. let elem0 = list.element(0).unwrap(); assert_eq!(elem0.as_primitive().typed_value::().unwrap(), 10); @@ -291,7 +357,7 @@ mod tests { let elem2 = list.element(2).unwrap(); assert_eq!(elem2.as_primitive().typed_value::().unwrap(), 30); - // Test out of bounds + // Test out of bounds. assert!(list.element(3).is_none()); } @@ -302,7 +368,8 @@ mod tests { Scalar::primitive(100i32, Nullability::NonNullable), Scalar::primitive(200i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); let elements = list.elements().unwrap(); @@ -325,7 +392,8 @@ mod tests { Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); let display = format!("{list}"); @@ -340,13 +408,15 @@ mod tests { Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar1 = Scalar::list(element_dtype.clone(), children1, Nullability::NonNullable); + let list_scalar1 = + Scalar::list_from_scalars(element_dtype.clone(), children1, Nullability::NonNullable); let children2 = vec![ Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar2 = Scalar::list(element_dtype, children2, Nullability::NonNullable); + let list_scalar2 = + Scalar::list_from_scalars(element_dtype, children2, Nullability::NonNullable); let list1 = list_scalar1.as_list(); let list2 = list_scalar2.as_list(); @@ -361,13 +431,15 @@ mod tests { Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar1 = Scalar::list(element_dtype.clone(), children1, Nullability::NonNullable); + let list_scalar1 = + Scalar::list_from_scalars(element_dtype.clone(), children1, Nullability::NonNullable); let children2 = vec![ Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(3i32, Nullability::NonNullable), ]; - let list_scalar2 = Scalar::list(element_dtype, children2, Nullability::NonNullable); + let list_scalar2 = + Scalar::list_from_scalars(element_dtype, children2, Nullability::NonNullable); let list1 = list_scalar1.as_list(); let list2 = list_scalar2.as_list(); @@ -380,10 +452,12 @@ mod tests { let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)); let children1 = vec![Scalar::primitive(1i32, Nullability::NonNullable)]; - let list_scalar1 = Scalar::list(element_dtype.clone(), children1, Nullability::NonNullable); + let list_scalar1 = + Scalar::list_from_scalars(element_dtype.clone(), children1, Nullability::NonNullable); let children2 = vec![Scalar::primitive(2i32, Nullability::NonNullable)]; - let list_scalar2 = Scalar::list(element_dtype, children2, Nullability::NonNullable); + let list_scalar2 = + Scalar::list_from_scalars(element_dtype, children2, Nullability::NonNullable); let list1 = list_scalar1.as_list(); let list2 = list_scalar2.as_list(); @@ -397,10 +471,12 @@ mod tests { let element_dtype2 = Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)); let children1 = vec![Scalar::primitive(1i32, Nullability::NonNullable)]; - let list_scalar1 = Scalar::list(element_dtype1, children1, Nullability::NonNullable); + let list_scalar1 = + Scalar::list_from_scalars(element_dtype1, children1, Nullability::NonNullable); let children2 = vec![Scalar::primitive(1i64, Nullability::NonNullable)]; - let list_scalar2 = Scalar::list(element_dtype2, children2, Nullability::NonNullable); + let list_scalar2 = + Scalar::list_from_scalars(element_dtype2, children2, Nullability::NonNullable); let list1 = list_scalar1.as_list(); let list2 = list_scalar2.as_list(); @@ -419,7 +495,8 @@ mod tests { Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); @@ -442,7 +519,8 @@ mod tests { Scalar::primitive(20i32, Nullability::NonNullable), Scalar::primitive(30i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let vec: Vec = Vec::try_from(&list_scalar).unwrap(); assert_eq!(vec, vec![10, 20, 30]); @@ -464,11 +542,12 @@ mod tests { Scalar::primitive(1i32, Nullability::NonNullable), Scalar::primitive(2i32, Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); - // Cast to list with i64 elements + // Cast to list with i64 elements. let target_dtype = DType::List( Arc::new(DType::Primitive(PType::I64, Nullability::NonNullable)), Nullability::NonNullable, @@ -489,7 +568,7 @@ mod tests { let children = vec![ Scalar::primitive(1i64, Nullability::NonNullable), // Wrong type! ]; - Scalar::list(element_dtype, children, Nullability::NonNullable); + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); } #[test] @@ -505,7 +584,8 @@ mod tests { Scalar::utf8("hello".to_string(), Nullability::NonNullable), Scalar::utf8("world".to_string(), Nullability::NonNullable), ]; - let list_scalar = Scalar::list(element_dtype, children, Nullability::NonNullable); + let list_scalar = + Scalar::list_from_scalars(element_dtype, children, Nullability::NonNullable); let list = list_scalar.as_list(); assert_eq!(list.len(), 2); @@ -525,7 +605,7 @@ mod tests { Nullability::NonNullable, )); - let inner_list1 = Scalar::list( + let inner_list1 = Scalar::list_from_scalars( inner_element_dtype.clone(), vec![ Scalar::primitive(1i32, Nullability::NonNullable), @@ -534,7 +614,7 @@ mod tests { Nullability::NonNullable, ); - let inner_list2 = Scalar::list( + let inner_list2 = Scalar::list_from_scalars( inner_element_dtype, vec![ Scalar::primitive(3i32, Nullability::NonNullable), @@ -543,7 +623,7 @@ mod tests { Nullability::NonNullable, ); - let outer_list = Scalar::list( + let outer_list = Scalar::list_from_scalars( inner_list_dtype, vec![inner_list1, inner_list2], Nullability::NonNullable, diff --git a/vortex-array/src/scalar/typed_view/struct_.rs b/vortex-array/src/scalar/typed_view/struct_.rs index ce571350a5b..f9d2f38c18f 100644 --- a/vortex-array/src/scalar/typed_view/struct_.rs +++ b/vortex-array/src/scalar/typed_view/struct_.rs @@ -122,7 +122,7 @@ impl<'a> StructScalar<'a> { Ok(Self { dtype, - fields: value.map(|value| value.as_list()), + fields: value.map(|value| value.as_struct_fields()), }) } @@ -237,7 +237,7 @@ impl<'a> StructScalar<'a> { .map(|s| s.into_value()) }) .collect::>>()?; - Scalar::try_new(dtype.clone(), Some(ScalarValue::List(fields))) + Scalar::try_new(dtype.clone(), Some(ScalarValue::Struct(fields))) } else { Ok(Scalar::null(dtype.clone())) } @@ -262,7 +262,7 @@ impl<'a> StructScalar<'a> { return Ok(Scalar::null(projected_dtype)); }; - let new_fields = ScalarValue::List( + let new_fields = ScalarValue::Struct( projection .iter() .map(|name| { @@ -307,7 +307,7 @@ impl Scalar { } let value_children: Vec<_> = children.into_iter().map(|x| x.into_value()).collect(); - Self::try_new(dtype, Some(ScalarValue::List(value_children))) + Self::try_new(dtype, Some(ScalarValue::Struct(value_children))) .vortex_expect("unable to construct a struct `Scalar`") } @@ -324,7 +324,7 @@ impl Scalar { children: impl IntoIterator, ) -> Self { let value_children: Vec<_> = children.into_iter().map(|s| s.into_value()).collect(); - Self::try_new(dtype, Some(ScalarValue::List(value_children))) + Self::try_new(dtype, Some(ScalarValue::Struct(value_children))) .vortex_expect("unable to construct a struct `Scalar`") } } diff --git a/vortex-array/src/scalar/validate.rs b/vortex-array/src/scalar/validate.rs index 3fed201ed93..21a06a8de57 100644 --- a/vortex-array/src/scalar/validate.rs +++ b/vortex-array/src/scalar/validate.rs @@ -74,35 +74,35 @@ impl Scalar { ); } DType::List(elem_dtype, _) => { - let ScalarValue::List(elements) = value else { - vortex_bail!("list dtype expected List value, got {value}"); + let ScalarValue::Array(array) = value else { + vortex_bail!("list dtype expected Array value, got {value}"); }; - - for (i, element) in elements.iter().enumerate() { - Self::validate(elem_dtype.as_ref(), element.as_ref()) - .map_err(|e| vortex_error::vortex_err!("list element at index {i}: {e}"))?; - } + vortex_ensure_eq!( + elem_dtype.as_ref(), + array.dtype(), + "list element dtype {elem_dtype} is not compatible with array dtype {}", + array.dtype() + ); } DType::FixedSizeList(elem_dtype, size, _) => { - let ScalarValue::List(elements) = value else { - vortex_bail!("fixed-size list dtype expected List value, got {value}",); + let ScalarValue::Array(array) = value else { + vortex_bail!("fixed-size list dtype expected Array value, got {value}"); }; - - let len = elements.len(); vortex_ensure_eq!( - len, + elem_dtype.as_ref(), + array.dtype(), + "FSL element dtype {elem_dtype} is not compatible with array dtype {}", + array.dtype() + ); + vortex_ensure_eq!( + array.len(), *size as usize, - "fixed-size list dtype expected {size} elements, got {len}", + "fixed-size list expected {size} elements, got {}", + array.len() ); - - for (i, element) in elements.iter().enumerate() { - Self::validate(elem_dtype.as_ref(), element.as_ref()).map_err(|e| { - vortex_error::vortex_err!("fixed-size list element at index {i}: {e}",) - })?; - } } DType::Struct(fields, _) => { - let ScalarValue::List(values) = value else { + let ScalarValue::Struct(values) = value else { vortex_bail!("struct dtype expected List value, got {value}"); }; diff --git a/vortex-array/src/scalar_fn/fns/binary/compare.rs b/vortex-array/src/scalar_fn/fns/binary/compare.rs index 3730b13cfe8..8b79d385e23 100644 --- a/vortex-array/src/scalar_fn/fns/binary/compare.rs +++ b/vortex-array/src/scalar_fn/fns/binary/compare.rs @@ -397,7 +397,7 @@ mod tests { ) .unwrap(); - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), vec![3i32.into(), 4i32.into()], Nullability::NonNullable, diff --git a/vortex-array/src/scalar_fn/fns/list_contains/mod.rs b/vortex-array/src/scalar_fn/fns/list_contains/mod.rs index 4bc17f9f89a..c37f46a02c6 100644 --- a/vortex-array/src/scalar_fn/fns/list_contains/mod.rs +++ b/vortex-array/src/scalar_fn/fns/list_contains/mod.rs @@ -612,7 +612,7 @@ mod tests { #[test] pub fn list_falsification() { let expr = list_contains( - lit(Scalar::list( + lit(Scalar::list_from_scalars( Arc::new(DType::Primitive(I32, Nullability::NonNullable)), vec![1.into(), 2.into(), 3.into()], Nullability::NonNullable, @@ -663,7 +663,7 @@ mod tests { let arr = test_array(); // Both list and needle are constants - should use scalar optimization - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(I32, Nullability::NonNullable)), vec![1.into(), 2.into(), 3.into()], Nullability::NonNullable, @@ -786,7 +786,7 @@ mod tests { #[test] fn test_constant_list() { let list_array = ConstantArray::new( - Scalar::list( + Scalar::list_from_scalars( Arc::new(DType::Primitive(I32, Nullability::NonNullable)), vec![1i32.into(), 2i32.into(), 3i32.into()], Nullability::NonNullable, @@ -826,7 +826,7 @@ mod tests { #[test] fn test_list_array_element() { - let list_scalar = Scalar::list( + let list_scalar = Scalar::list_from_scalars( Arc::new(DType::Primitive(I32, Nullability::NonNullable)), vec![1.into(), 3.into(), 6.into()], Nullability::NonNullable, diff --git a/vortex-array/src/serde.rs b/vortex-array/src/serde.rs index e749e2696c3..da472c4c2c0 100644 --- a/vortex-array/src/serde.rs +++ b/vortex-array/src/serde.rs @@ -639,6 +639,32 @@ impl ArrayChildren for ArrayPartsChildren<'_> { } } +// TODO(connor): Do we need this? + +impl TryFrom<&[u8]> for ArrayParts { + type Error = VortexError; + + fn try_from(value: &[u8]) -> Result { + // The final 4 bytes contain the length of the flatbuffer. + if value.len() < 4 { + vortex_bail!("ArrayParts buffer is too short"); + } + + // TODO(connor): Do we care about alignment here? + + let fb_length = u32::try_from_le_bytes(&value[value.len() - 4..])? as usize; + if value.len() < 4 + fb_length { + vortex_bail!("ArrayParts buffer is too short for flatbuffer"); + } + + let fb_offset = value.len() - 4 - fb_length; + let array_tree = ByteBuffer::copy_from(&value[fb_offset..fb_offset + fb_length]); + let segment = BufferHandle::new_host(ByteBuffer::copy_from(&value[0..fb_offset])); + + Self::from_flatbuffer_and_segment(array_tree, segment) + } +} + impl TryFrom for ArrayParts { type Error = VortexError; diff --git a/vortex-datafusion/src/convert/exprs.rs b/vortex-datafusion/src/convert/exprs.rs index b0380db5cb3..6f23f1beeb1 100644 --- a/vortex-datafusion/src/convert/exprs.rs +++ b/vortex-datafusion/src/convert/exprs.rs @@ -221,7 +221,7 @@ impl ExpressionConvertor for DefaultExpressionConvertor { }) .try_collect()?; - let list = Scalar::list( + let list = Scalar::list_from_scalars( list_elements[0].dtype().clone(), list_elements, Nullability::Nullable, diff --git a/vortex-duckdb/src/convert/expr.rs b/vortex-duckdb/src/convert/expr.rs index c2d5a37813d..a718c2d52f1 100644 --- a/vortex-duckdb/src/convert/expr.rs +++ b/vortex-duckdb/src/convert/expr.rs @@ -139,7 +139,7 @@ pub fn try_from_bound_expression( else { return Ok(None); }; - let list = Scalar::list( + let list = Scalar::list_from_scalars( Arc::new(list_elements[0].dtype().clone()), list_elements, Nullability::Nullable, diff --git a/vortex-duckdb/src/convert/scalar.rs b/vortex-duckdb/src/convert/scalar.rs index 97f788dd39d..8cdac87c5d2 100644 --- a/vortex-duckdb/src/convert/scalar.rs +++ b/vortex-duckdb/src/convert/scalar.rs @@ -321,7 +321,7 @@ impl<'a> TryFrom<&'a ValueRef> for Scalar { Nullable, )), ExtractedValue::List(vs) => match dtype { - DType::List(c, _) => Ok(Scalar::list( + DType::List(c, _) => Ok(Scalar::list_from_scalars( c, vs.into_iter() .map(Scalar::try_from) diff --git a/vortex-duckdb/src/convert/table_filter.rs b/vortex-duckdb/src/convert/table_filter.rs index 702cdef695e..75d79b25c16 100644 --- a/vortex-duckdb/src/convert/table_filter.rs +++ b/vortex-duckdb/src/convert/table_filter.rs @@ -81,7 +81,8 @@ pub fn try_from_table_filter( "IN filter must have at least one value" ); let dtype = scalars[0].dtype().clone(); - let list_scalar = Scalar::list(Arc::new(dtype), scalars, Nullability::Nullable); + let list_scalar = + Scalar::list_from_scalars(Arc::new(dtype), scalars, Nullability::Nullable); list_contains(lit(list_scalar), col.clone()) } TableFilterClass::Dynamic(dynamic) => { diff --git a/vortex-ipc/src/messages/decoder.rs b/vortex-ipc/src/messages/decoder.rs index c61b546506d..9847efdc6f1 100644 --- a/vortex-ipc/src/messages/decoder.rs +++ b/vortex-ipc/src/messages/decoder.rs @@ -44,7 +44,7 @@ pub enum PollRead { Some(DecoderMessage), /// The decoder needs more data to make progress. /// - /// The inner value is the **total*k number of bytes the buffer should contain, not the + /// The inner value is the **total** number of bytes the buffer should contain, not the /// incremental amount needed. Callers should: /// /// 1. Resize the buffer to this length. diff --git a/vortex-proto/proto/scalar.proto b/vortex-proto/proto/scalar.proto index fcc31b25e3e..26ef773b31a 100644 --- a/vortex-proto/proto/scalar.proto +++ b/vortex-proto/proto/scalar.proto @@ -5,17 +5,37 @@ syntax = "proto3"; package vortex.scalar; -option java_package = "dev.vortex.proto"; -option java_outer_classname = "ScalarProtos"; - import "dtype.proto"; import "google/protobuf/struct.proto"; +option java_outer_classname = "ScalarProtos"; +option java_package = "dev.vortex.proto"; + message Scalar { + // The logical data type of this scalar. vortex.dtype.DType dtype = 1; + // The value payload, or absent if the scalar is null (only possible when the dtype is + // nullable). ScalarValue value = 2; } +// The value payload of a `Scalar`. +// +// Each variant corresponds to a physical representation. The correct interpretation depends on the +// accompanying `DType` in the parent `Scalar` message. For example, an `int64_value` may represent +// an i8, i16, i32, or i64 depending on the dtype's `PType`. Deserialization of a `ScalarValue` +// only makes sense when paired with a `DType`. +// +// Primitive integer types are widened to 64-bit (`sint64` for signed, `uint64` for unsigned) since +// protobuf does not have narrower integer fields. The original width is recovered using the dtype +// during deserialization. +// +// The order of this enum is the order in which these types were added. We must maintain it to +// preserve backwards compatibility. +// +// Finally, note that this `ScalarValue` in **not** 1-to-1 with the Vortex `ScalarValue` enum, as +// this protobuf type internally tracks if the value is null. The Vortex `ScalarValue` enum in +// non-nullable, and we reperent a nullable Vortex scalar value as `Option` instead. message ScalarValue { oneof kind { google.protobuf.NullValue null_value = 1; @@ -26,11 +46,37 @@ message ScalarValue { double f64_value = 6; string string_value = 7; bytes bytes_value = 8; - ListValue list_value = 9; + StructValue struct_value = 9; uint64 f16_value = 10; + ArrayValue array_value = 11; } } -message ListValue { - repeated ScalarValue values = 1; +// A struct scalar value that packs several heterogeneous field values together. +// +// Each entry in `fields` corresponds to a field of the parent struct dtype, in order. +// +// NB: This message was previously called `ListValue` and was used for both struct and list / +// fixed-size list scalars. It has since been renamed, and list scalars are now serialized using +// `ArrayValue` instead. +// However, old files may still contain `StructValue` (tag 9) payloads for list dtypes. To maintain +// backwards compatibility, deserializers must check the accompanying dtype and, if it is a list or +// fixed-size list, reconstruct an array from the individual scalar elements. +message StructValue { + // The field values, one per struct field, in dtype field order. + repeated ScalarValue fields = 1; +} + +// A serialized Vortex array, used to represent a single list or fixed-size list scalar. +// +// The array is serialized using `Array::serialize()` and can be reconstructed with +// `ArrayParts::decode()`. The `encoding_ids` list provides the encoding table needed for +// deserialization, and `data` contains the concatenated array buffers. +message ArrayValue { + // The number of elements (rows) in the serialized array. + uint64 row_count = 1; + // Encoding IDs that form the encoding table for deserialization. + repeated string encoding_ids = 2; + // The concatenated serialized array buffers. + bytes data = 3; } diff --git a/vortex-proto/public-api.lock b/vortex-proto/public-api.lock index 303fa403401..a8dbc9fa187 100644 --- a/vortex-proto/public-api.lock +++ b/vortex-proto/public-api.lock @@ -1084,6 +1084,8 @@ pub mod vortex_proto::scalar::scalar_value pub enum vortex_proto::scalar::scalar_value::Kind +pub vortex_proto::scalar::scalar_value::Kind::ArrayValue(vortex_proto::scalar::ArrayValue) + pub vortex_proto::scalar::scalar_value::Kind::BoolValue(bool) pub vortex_proto::scalar::scalar_value::Kind::BytesValue(alloc::vec::Vec) @@ -1096,12 +1098,12 @@ pub vortex_proto::scalar::scalar_value::Kind::F64Value(f64) pub vortex_proto::scalar::scalar_value::Kind::Int64Value(i64) -pub vortex_proto::scalar::scalar_value::Kind::ListValue(vortex_proto::scalar::ListValue) - pub vortex_proto::scalar::scalar_value::Kind::NullValue(i32) pub vortex_proto::scalar::scalar_value::Kind::StringValue(alloc::string::String) +pub vortex_proto::scalar::scalar_value::Kind::StructValue(vortex_proto::scalar::StructValue) + pub vortex_proto::scalar::scalar_value::Kind::Uint64Value(u64) impl vortex_proto::scalar::scalar_value::Kind @@ -1126,33 +1128,43 @@ pub fn vortex_proto::scalar::scalar_value::Kind::fmt(&self, f: &mut core::fmt::F impl core::marker::StructuralPartialEq for vortex_proto::scalar::scalar_value::Kind -pub struct vortex_proto::scalar::ListValue +pub struct vortex_proto::scalar::ArrayValue + +pub vortex_proto::scalar::ArrayValue::data: alloc::vec::Vec + +pub vortex_proto::scalar::ArrayValue::encoding_ids: alloc::vec::Vec + +pub vortex_proto::scalar::ArrayValue::row_count: u64 -pub vortex_proto::scalar::ListValue::values: alloc::vec::Vec +impl core::clone::Clone for vortex_proto::scalar::ArrayValue -impl core::clone::Clone for vortex_proto::scalar::ListValue +pub fn vortex_proto::scalar::ArrayValue::clone(&self) -> vortex_proto::scalar::ArrayValue -pub fn vortex_proto::scalar::ListValue::clone(&self) -> vortex_proto::scalar::ListValue +impl core::cmp::Eq for vortex_proto::scalar::ArrayValue -impl core::cmp::PartialEq for vortex_proto::scalar::ListValue +impl core::cmp::PartialEq for vortex_proto::scalar::ArrayValue -pub fn vortex_proto::scalar::ListValue::eq(&self, other: &vortex_proto::scalar::ListValue) -> bool +pub fn vortex_proto::scalar::ArrayValue::eq(&self, other: &vortex_proto::scalar::ArrayValue) -> bool -impl core::default::Default for vortex_proto::scalar::ListValue +impl core::default::Default for vortex_proto::scalar::ArrayValue -pub fn vortex_proto::scalar::ListValue::default() -> Self +pub fn vortex_proto::scalar::ArrayValue::default() -> Self -impl core::fmt::Debug for vortex_proto::scalar::ListValue +impl core::fmt::Debug for vortex_proto::scalar::ArrayValue -pub fn vortex_proto::scalar::ListValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub fn vortex_proto::scalar::ArrayValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result -impl core::marker::StructuralPartialEq for vortex_proto::scalar::ListValue +impl core::hash::Hash for vortex_proto::scalar::ArrayValue -impl prost::message::Message for vortex_proto::scalar::ListValue +pub fn vortex_proto::scalar::ArrayValue::hash<__H: core::hash::Hasher>(&self, state: &mut __H) -pub fn vortex_proto::scalar::ListValue::clear(&mut self) +impl core::marker::StructuralPartialEq for vortex_proto::scalar::ArrayValue -pub fn vortex_proto::scalar::ListValue::encoded_len(&self) -> usize +impl prost::message::Message for vortex_proto::scalar::ArrayValue + +pub fn vortex_proto::scalar::ArrayValue::clear(&mut self) + +pub fn vortex_proto::scalar::ArrayValue::encoded_len(&self) -> usize pub struct vortex_proto::scalar::Scalar @@ -1211,3 +1223,31 @@ impl prost::message::Message for vortex_proto::scalar::ScalarValue pub fn vortex_proto::scalar::ScalarValue::clear(&mut self) pub fn vortex_proto::scalar::ScalarValue::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::StructValue + +pub vortex_proto::scalar::StructValue::fields: alloc::vec::Vec + +impl core::clone::Clone for vortex_proto::scalar::StructValue + +pub fn vortex_proto::scalar::StructValue::clone(&self) -> vortex_proto::scalar::StructValue + +impl core::cmp::PartialEq for vortex_proto::scalar::StructValue + +pub fn vortex_proto::scalar::StructValue::eq(&self, other: &vortex_proto::scalar::StructValue) -> bool + +impl core::default::Default for vortex_proto::scalar::StructValue + +pub fn vortex_proto::scalar::StructValue::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::StructValue + +pub fn vortex_proto::scalar::StructValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::StructValue + +impl prost::message::Message for vortex_proto::scalar::StructValue + +pub fn vortex_proto::scalar::StructValue::clear(&mut self) + +pub fn vortex_proto::scalar::StructValue::encoded_len(&self) -> usize diff --git a/vortex-proto/src/generated/vortex.scalar.rs b/vortex-proto/src/generated/vortex.scalar.rs index 9f4e3047e54..dbdbf272a67 100644 --- a/vortex-proto/src/generated/vortex.scalar.rs +++ b/vortex-proto/src/generated/vortex.scalar.rs @@ -1,14 +1,34 @@ // This file is @generated by prost-build. #[derive(Clone, PartialEq, ::prost::Message)] pub struct Scalar { + /// The logical data type of this scalar. #[prost(message, optional, tag = "1")] pub dtype: ::core::option::Option, + /// The value payload, or absent if the scalar is null (only possible when the dtype is + /// nullable). #[prost(message, optional, tag = "2")] pub value: ::core::option::Option, } +/// The value payload of a `Scalar`. +/// +/// Each variant corresponds to a physical representation. The correct interpretation depends on the +/// accompanying `DType` in the parent `Scalar` message. For example, an `int64_value` may represent +/// an i8, i16, i32, or i64 depending on the dtype's `PType`. Deserialization of a `ScalarValue` +/// only makes sense when paired with a `DType`. +/// +/// Primitive integer types are widened to 64-bit (`sint64` for signed, `uint64` for unsigned) since +/// protobuf does not have narrower integer fields. The original width is recovered using the dtype +/// during deserialization. +/// +/// The order of this enum is the order in which these types were added. We must maintain it to +/// preserve backwards compatibility. +/// +/// Finally, note that this `ScalarValue` in **not** 1-to-1 with the Vortex `ScalarValue` enum, as +/// this protobuf type internally tracks if the value is null. The Vortex `ScalarValue` enum in +/// non-nullable, and we reperent a nullable Vortex scalar value as `Option` instead. #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScalarValue { - #[prost(oneof = "scalar_value::Kind", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10")] + #[prost(oneof = "scalar_value::Kind", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11")] pub kind: ::core::option::Option, } /// Nested message and enum types in `ScalarValue`. @@ -32,13 +52,43 @@ pub mod scalar_value { #[prost(bytes, tag = "8")] BytesValue(::prost::alloc::vec::Vec), #[prost(message, tag = "9")] - ListValue(super::ListValue), + StructValue(super::StructValue), #[prost(uint64, tag = "10")] F16Value(u64), + #[prost(message, tag = "11")] + ArrayValue(super::ArrayValue), } } +/// A struct scalar value that packs several heterogeneous field values together. +/// +/// Each entry in `fields` corresponds to a field of the parent struct dtype, in order. +/// +/// NB: This message was previously called `ListValue` and was used for both struct and list / +/// fixed-size list scalars. It has since been renamed, and list scalars are now serialized using +/// `ArrayValue` instead. +/// However, old files may still contain `StructValue` (tag 9) payloads for list dtypes. To maintain +/// backwards compatibility, deserializers must check the accompanying dtype and, if it is a list or +/// fixed-size list, reconstruct an array from the individual scalar elements. #[derive(Clone, PartialEq, ::prost::Message)] -pub struct ListValue { +pub struct StructValue { + /// The field values, one per struct field, in dtype field order. #[prost(message, repeated, tag = "1")] - pub values: ::prost::alloc::vec::Vec, + pub fields: ::prost::alloc::vec::Vec, +} +/// A serialized Vortex array, used to represent a single list or fixed-size list scalar. +/// +/// The array is serialized using `Array::serialize()` and can be reconstructed with +/// `ArrayParts::decode()`. The `encoding_ids` list provides the encoding table needed for +/// deserialization, and `data` contains the concatenated array buffers. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct ArrayValue { + /// The number of elements (rows) in the serialized array. + #[prost(uint64, tag = "1")] + pub row_count: u64, + /// Encoding IDs that form the encoding table for deserialization. + #[prost(string, repeated, tag = "2")] + pub encoding_ids: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// The concatenated serialized array buffers. + #[prost(bytes = "vec", tag = "3")] + pub data: ::prost::alloc::vec::Vec, } diff --git a/vortex-python/src/scalar/factory.rs b/vortex-python/src/scalar/factory.rs index d4a0e000cf1..ace38abf9f7 100644 --- a/vortex-python/src/scalar/factory.rs +++ b/vortex-python/src/scalar/factory.rs @@ -183,7 +183,7 @@ fn scalar_helper_inner(value: &Bound<'_, PyAny>, dtype: Option<&DType>) -> PyRes .iter() .map(|e| scalar_helper_inner(&e, Some(element_dtype))) .try_collect()?; - Scalar::list(element_dtype.clone(), elements, Nullability::NonNullable); + Scalar::list_from_scalars(element_dtype.clone(), elements, Nullability::NonNullable); } else { // If no dtype was provided, we need to infer the element dtype from the list contents. // We do this in a greedy way taking the first element dtype we find. @@ -198,7 +198,7 @@ fn scalar_helper_inner(value: &Bound<'_, PyAny>, dtype: Option<&DType>) -> PyRes elements.push(scalar); } - return Ok(Scalar::list( + return Ok(Scalar::list_from_scalars( element_dtype .map(Arc::new) // Empty list defaults to Null dtype