-
Notifications
You must be signed in to change notification settings - Fork 0
API for separate planning and execution #75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
09df555
fc098f4
7848ea2
0e6ab05
2b224fb
d6ce623
30c82a6
dc55dda
005dde8
4857d08
3ba9519
a013f7d
61d55fe
e91242e
ca76405
f2d20d4
8880576
b3024cd
d1ce978
e0ca9bc
7989f7a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| [package] | ||
| name = "iceberg_rust_ffi" | ||
| version = "0.7.15" | ||
| version = "0.7.16" | ||
| edition = "2021" | ||
|
|
||
| [lib] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,22 +1,44 @@ | ||
| use std::ffi::{c_char, c_void, CStr}; | ||
| use std::ptr; | ||
|
|
||
| use futures::stream; | ||
| use futures::{StreamExt, TryStreamExt}; | ||
| use iceberg::arrow::ArrowReaderBuilder; | ||
| use iceberg::io::FileIO; | ||
| use iceberg::scan::{TableScan, TableScanBuilder}; | ||
| use object_store_ffi::{ | ||
| export_runtime_op, with_cancellation, CResult, NotifyGuard, ResponseGuard, RT, | ||
| }; | ||
| use tokio::sync::Mutex as AsyncMutex; | ||
|
|
||
| use crate::scan_common::*; | ||
| use crate::{IcebergArrowStream, IcebergArrowStreamResponse, IcebergTable}; | ||
| use crate::{ | ||
| IcebergArrowStream, IcebergArrowStreamResponse, IcebergFileScanTaskStreamResponse, | ||
| IcebergNextFileScanTaskResponse, IcebergTable, | ||
| }; | ||
|
|
||
| /// Struct for regular (full) scan builder and scan | ||
| /// FFI wrapper for a regular (full) table scan. | ||
| /// | ||
| /// In addition to the builder/scan from iceberg-rust, we store copies of | ||
| /// file_io, batch_size, and data_file_concurrency_limit so that | ||
| /// `create_reader` can build an ArrowReaderBuilder without accessing | ||
| /// private fields on TableScan. | ||
| #[repr(C)] | ||
| pub struct IcebergScan { | ||
| pub builder: Option<TableScanBuilder<'static>>, | ||
| pub scan: Option<TableScan>, | ||
| /// 0 = auto-detect (num_cpus) | ||
| pub serialization_concurrency: usize, | ||
| /// Cloned from Table at scan creation time for use by create_reader | ||
| pub file_io: Option<FileIO>, | ||
| /// 0 = no batch size override (use reader default) | ||
| pub batch_size: usize, | ||
| /// 0 = auto-detect (num_cpus) | ||
| pub data_file_concurrency_limit: usize, | ||
| /// 0 = use DEFAULT_PREFETCH_DEPTH (for batch prefetch) | ||
| pub prefetch_depth: usize, | ||
| /// 0 = use DEFAULT_TASK_PREFETCH_DEPTH (for task planning prefetch) | ||
| pub task_prefetch_depth: usize, | ||
| } | ||
|
|
||
| unsafe impl Send for IcebergScan {} | ||
|
|
@@ -28,23 +50,46 @@ pub extern "C" fn iceberg_new_scan(table: *mut IcebergTable) -> *mut IcebergScan | |
| return ptr::null_mut(); | ||
| } | ||
| let table_ref = unsafe { &*table }; | ||
| // Clone FileIO for later use by create_reader. | ||
| // FileIO is Arc-based internally so this is cheap. | ||
| let file_io = table_ref.table.file_io().clone(); | ||
| let scan_builder = table_ref.table.scan(); | ||
| Box::into_raw(Box::new(IcebergScan { | ||
| builder: Some(scan_builder), | ||
| scan: None, | ||
| serialization_concurrency: 0, | ||
| file_io: Some(file_io), | ||
| batch_size: 0, | ||
| data_file_concurrency_limit: 0, | ||
| prefetch_depth: 0, | ||
| task_prefetch_depth: 0, | ||
| })) | ||
| } | ||
|
|
||
| // Use macros from scan_common for shared functionality | ||
| impl_select_columns!(iceberg_select_columns, IcebergScan); | ||
|
|
||
| impl_scan_builder_method!( | ||
| iceberg_scan_with_data_file_concurrency_limit, | ||
| IcebergScan, | ||
| with_data_file_concurrency_limit, | ||
| n: usize | ||
| ); | ||
| /// Sets data file concurrency and stores the value for create_reader. | ||
| #[no_mangle] | ||
| pub extern "C" fn iceberg_scan_with_data_file_concurrency_limit( | ||
| scan: &mut *mut IcebergScan, | ||
| n: usize, | ||
| ) -> CResult { | ||
| if scan.is_null() || (*scan).is_null() { | ||
| return CResult::Error; | ||
| } | ||
| let mut scan_val = *unsafe { Box::from_raw(*scan) }; | ||
| if scan_val.builder.is_none() { | ||
| *scan = Box::into_raw(Box::new(scan_val)); | ||
| return CResult::Error; | ||
| } | ||
| scan_val.builder = scan_val | ||
| .builder | ||
| .map(|b| b.with_data_file_concurrency_limit(n)); | ||
| scan_val.data_file_concurrency_limit = n; | ||
| *scan = Box::into_raw(Box::new(scan_val)); | ||
| CResult::Ok | ||
| } | ||
|
|
||
| impl_scan_builder_method!( | ||
| iceberg_scan_with_manifest_file_concurrency_limit, | ||
|
|
@@ -73,6 +118,16 @@ impl_with_serialization_concurrency_limit!( | |
| IcebergScan | ||
| ); | ||
|
|
||
| impl_with_prefetch_depth!( | ||
| iceberg_scan_with_prefetch_depth, | ||
| IcebergScan | ||
| ); | ||
|
|
||
| impl_with_task_prefetch_depth!( | ||
| iceberg_scan_with_task_prefetch_depth, | ||
| IcebergScan | ||
| ); | ||
|
|
||
| impl_scan_builder_method!( | ||
| iceberg_scan_with_snapshot_id, | ||
| IcebergScan, | ||
|
|
@@ -104,11 +159,17 @@ export_runtime_op!( | |
| serialization_concurrency | ||
| }; | ||
|
|
||
| Ok((scan_ref.as_ref().unwrap(), serialization_concurrency)) | ||
| let prefetch_depth = if scan_ptr.prefetch_depth == 0 { | ||
| crate::table::DEFAULT_PREFETCH_DEPTH | ||
| } else { | ||
| scan_ptr.prefetch_depth | ||
| }; | ||
|
|
||
| Ok((scan_ref.as_ref().unwrap(), serialization_concurrency, prefetch_depth)) | ||
| }, | ||
| result_tuple, | ||
| async { | ||
| let (scan_ref, serialization_concurrency) = result_tuple; | ||
| let (scan_ref, serialization_concurrency, prefetch_depth) = result_tuple; | ||
|
|
||
| let stream = scan_ref.to_arrow().await?; | ||
|
|
||
|
|
@@ -118,11 +179,189 @@ export_runtime_op!( | |
| serialization_concurrency | ||
| ); | ||
|
|
||
| Ok::<IcebergArrowStream, anyhow::Error>(IcebergArrowStream { | ||
| stream: AsyncMutex::new(serialized_stream), | ||
| }) | ||
| Ok::<IcebergArrowStream, anyhow::Error>(IcebergArrowStream::new( | ||
| serialized_stream, | ||
| prefetch_depth, | ||
| )) | ||
| }, | ||
| scan: *mut IcebergScan | ||
| ); | ||
|
|
||
| impl_scan_free!(iceberg_scan_free, IcebergScan); | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Split-scan API: plan_files → create_reader → next_task → read_task | ||
| // | ||
| // This separates scan planning from reading, allowing multiple consumers | ||
| // to concurrently pull tasks from a shared stream and read them with a | ||
| // shared ArrowReader (which caches loaded delete files via Arc). | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| // Async: plan which files to read. Returns a shared task stream. | ||
| // Multiple consumers can call next_task on the same stream concurrently. | ||
| export_runtime_op!( | ||
| iceberg_plan_files, | ||
| IcebergFileScanTaskStreamResponse, | ||
| || { | ||
| if scan.is_null() { | ||
| return Err(anyhow::anyhow!("Null scan pointer provided")); | ||
| } | ||
| let scan_ptr = unsafe { &*scan }; | ||
| let scan_ref = scan_ptr.scan.as_ref() | ||
| .ok_or_else(|| anyhow::anyhow!("Scan not built — call build! first"))?; | ||
|
|
||
| let task_prefetch_depth = if scan_ptr.task_prefetch_depth == 0 { | ||
| crate::table::DEFAULT_TASK_PREFETCH_DEPTH | ||
| } else { | ||
| scan_ptr.task_prefetch_depth | ||
| }; | ||
|
|
||
| Ok((scan_ref, task_prefetch_depth)) | ||
| }, | ||
| result_tuple, | ||
| async { | ||
| let (scan_ref, task_prefetch_depth) = result_tuple; | ||
| let stream = scan_ref.plan_files().await?; | ||
| Ok::<crate::IcebergFileScanTaskStream, anyhow::Error>( | ||
| crate::IcebergFileScanTaskStream::new(stream, task_prefetch_depth) | ||
| ) | ||
| }, | ||
| scan: *mut IcebergScan | ||
| ); | ||
|
|
||
| /// Sync: create a shared ArrowReader context from the scan's configuration. | ||
| /// | ||
| /// The returned context should be passed to every read_task call. | ||
| /// Cloning the inner ArrowReader is cheap and shares the delete-file cache. | ||
| /// | ||
| /// For full scans, row_group_filtering is enabled and row_selection is | ||
| /// disabled, matching the defaults in TableScan::to_arrow(). | ||
| /// `reader_concurrency`: data-file concurrency for the reader (0 = use scan default). | ||
| /// This overrides the scan-level `data_file_concurrency_limit` when set, | ||
| /// allowing the per-task reader to use a different parallelism than the scan planner. | ||
| #[no_mangle] | ||
| pub extern "C" fn iceberg_create_reader( | ||
| scan: *mut IcebergScan, | ||
| reader_concurrency: usize, | ||
| ) -> *mut crate::IcebergArrowReaderContext { | ||
| if scan.is_null() { | ||
| return std::ptr::null_mut(); | ||
| } | ||
| let scan_ptr = unsafe { &*scan }; | ||
|
|
||
| let Some(file_io) = scan_ptr.file_io.as_ref() else { | ||
| return std::ptr::null_mut(); | ||
| }; | ||
|
|
||
| let mut builder = ArrowReaderBuilder::new(file_io.clone()) | ||
| .with_row_group_filtering_enabled(true) | ||
| .with_row_selection_enabled(false); | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually there's iceberg_create_incremental_reader |
||
|
|
||
| // reader_concurrency > 0 overrides the scan-level setting | ||
| let data_file_concurrency = if reader_concurrency > 0 { | ||
| reader_concurrency | ||
| } else { | ||
| scan_ptr.data_file_concurrency_limit | ||
| }; | ||
| if data_file_concurrency > 0 { | ||
| builder = builder.with_data_file_concurrency_limit(data_file_concurrency); | ||
| } | ||
| if scan_ptr.batch_size > 0 { | ||
| builder = builder.with_batch_size(scan_ptr.batch_size); | ||
| } | ||
|
|
||
| let serialization_concurrency = if scan_ptr.serialization_concurrency == 0 { | ||
| std::thread::available_parallelism() | ||
| .map(|n| n.get()) | ||
| .unwrap_or(1) | ||
| } else { | ||
| scan_ptr.serialization_concurrency | ||
| }; | ||
|
|
||
| let prefetch_depth = if scan_ptr.prefetch_depth == 0 { | ||
| crate::table::DEFAULT_PREFETCH_DEPTH | ||
| } else { | ||
| scan_ptr.prefetch_depth | ||
| }; | ||
|
|
||
| Box::into_raw(Box::new(crate::IcebergArrowReaderContext { | ||
| reader: builder.build(), | ||
| serialization_concurrency, | ||
| prefetch_depth, | ||
| })) | ||
| } | ||
|
|
||
| // Async: pull the next file scan task from the stream. | ||
| // Returns null payload when the stream is exhausted (end of planning). | ||
| // Safe to call concurrently from multiple consumers — the stream is | ||
| // behind an AsyncMutex. | ||
| export_runtime_op!( | ||
| iceberg_next_file_scan_task, | ||
| IcebergNextFileScanTaskResponse, | ||
| || { | ||
| if task_stream.is_null() { | ||
| return Err(anyhow::anyhow!("Null task stream pointer")); | ||
| } | ||
| let stream_ref = unsafe { &*task_stream }; | ||
| Ok(stream_ref) | ||
| }, | ||
| stream_ref, | ||
| async { | ||
| let result: Result<Option<crate::IcebergFileScanTask>, anyhow::Error> = match stream_ref.next().await? { | ||
| Some(task) => Ok(Some(crate::IcebergFileScanTask { task })), | ||
| None => Ok(None), | ||
| }; | ||
| result | ||
| }, | ||
| task_stream: *mut crate::IcebergFileScanTaskStream | ||
| ); | ||
|
|
||
| // Async: read a single file scan task into an Arrow stream. | ||
| // | ||
| // Clones the ArrowReader from the shared context — this shares the | ||
| // CachingDeleteFileLoader cache across all concurrent consumers. | ||
| // The task is wrapped in a one-element stream and fed to | ||
| // ArrowReader::read(). | ||
| export_runtime_op!( | ||
| iceberg_read_file_scan_task, | ||
| IcebergArrowStreamResponse, | ||
| || { | ||
| if reader_ctx.is_null() { | ||
| return Err(anyhow::anyhow!("Null reader context pointer")); | ||
| } | ||
| if task.is_null() { | ||
| return Err(anyhow::anyhow!("Null task pointer")); | ||
| } | ||
| let ctx = unsafe { &*reader_ctx }; | ||
| // Clone the reader — cheap, shares delete-file cache via Arc | ||
| let reader = ctx.reader.clone(); | ||
| let serialization_concurrency = ctx.serialization_concurrency; | ||
| let prefetch_depth = ctx.prefetch_depth; | ||
|
|
||
| // Consume the task — caller must NOT call free_task after this | ||
| let task_ref = unsafe { Box::from_raw(task) }; | ||
| let file_scan_task = task_ref.task; | ||
|
|
||
| Ok((reader, serialization_concurrency, prefetch_depth, file_scan_task)) | ||
| }, | ||
| result_tuple, | ||
| async { | ||
| let (reader, serialization_concurrency, prefetch_depth, file_scan_task) = result_tuple; | ||
|
|
||
| // Wrap single task in a one-element stream for ArrowReader::read() | ||
| let task_stream = stream::once(async { Ok(file_scan_task) }).boxed(); | ||
| let record_batch_stream = reader.read(task_stream)?; | ||
|
|
||
| let serialized_stream = crate::transform_stream_with_parallel_serialization( | ||
| record_batch_stream, | ||
| serialization_concurrency, | ||
| ); | ||
|
|
||
| Ok::<crate::IcebergArrowStream, anyhow::Error>(crate::IcebergArrowStream::new( | ||
| serialized_stream, | ||
| prefetch_depth, | ||
| )) | ||
| }, | ||
| reader_ctx: *mut crate::IcebergArrowReaderContext, | ||
| task: *mut crate::IcebergFileScanTask | ||
| ); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Claude also mentioned this:
in iceberg-rust repo in s3.rs. I haven't noticed this helps though