Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ impl USearchIndexConfig {
.map_err(|e| DataFusionError::Execution(format!("USearch Index::new failed: {e}")))
}

/// Load a previously saved index from `path`.
/// Load a previously saved index from `path` into memory.
///
/// Uses the same `IndexOptions` as `build_index()`. The options must
/// match those used when the index was originally built — passing wrong
Expand All @@ -101,6 +101,26 @@ impl USearchIndexConfig {
Ok(index)
}

/// Memory-map a previously saved index from `path`.
///
/// Unlike [`load_index`], this does not copy the index into RAM. The OS
/// pages data in on demand, keeping resident memory proportional to the
/// working set rather than the full index size. Prefer this for the
/// reload-from-disk path where the index file is already local.
///
/// The returned [`Index`] is fully functional for search; the backing
/// file must remain on disk for the lifetime of the index.
///
/// [`load_index`]: Self::load_index
pub fn view_index(&self, path: &str) -> Result<Index> {
let index = Index::new(&self.to_index_options())
.map_err(|e| DataFusionError::Execution(format!("USearch Index::new failed: {e}")))?;
index
.view(path)
.map_err(|e| DataFusionError::Execution(format!("USearch index view failed: {e}")))?;
Ok(index)
}

fn to_index_options(&self) -> IndexOptions {
IndexOptions {
dimensions: self.dimensions,
Expand Down
6 changes: 6 additions & 0 deletions src/sqlite_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,12 @@ impl SqliteLookupProvider {
)?;
}

// Checkpoint WAL so the data is flushed to the main database file.
// Without this, data written during build may only exist in the WAL
// and can be lost if the process exits before a passive checkpoint.
conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")
.map_err(|e| DataFusionError::Execution(format!("WAL checkpoint failed: {e}")))?;

let mut conns = vec![conn];
for _ in 1..pool_size {
conns.push(open_conn(db_path)?);
Expand Down
49 changes: 49 additions & 0 deletions tests/optimizer_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,3 +419,52 @@ async fn test_qualified_ref_where_clause_rewrites() {
"qualified ref + WHERE → filter absorbed, rule must fire\nPlan: {plan:?}"
);
}

// ═══════════════════════════════════════════════════════════════════════════════
// SELECT only distance — no base columns projected
// ═══════════════════════════════════════════════════════════════════════════════
//
// When the SELECT list contains only the distance UDF (no base table columns),
// the Projection node has a single computed expression. The optimizer must still
// recognise the pattern and rewrite to USearchNode.

/// Bare table, SELECT only distance alias, ORDER BY alias — rule must fire.
#[tokio::test]
async fn test_select_only_distance_bare_rewrites() {
let ctx = make_ctx(MetricKind::L2sq).await;
let sql =
format!("SELECT l2_distance(vector, {Q}) AS dist FROM items ORDER BY dist ASC LIMIT 5");
let plan = optimized_plan(&ctx, &sql).await;
assert!(
contains_usearch_node(&plan),
"SELECT only distance (bare) → rule must fire\nPlan: {plan:?}"
);
}

/// Qualified table, SELECT only distance alias, ORDER BY alias — rule must fire.
#[tokio::test]
async fn test_select_only_distance_qualified_rewrites() {
let ctx = make_ctx_qualified(MetricKind::L2sq).await;
let sql = format!(
"SELECT l2_distance(vector, {Q}) AS dist FROM datafusion.public.items ORDER BY dist ASC LIMIT 5"
);
let plan = optimized_plan(&ctx, &sql).await;
assert!(
contains_usearch_node(&plan),
"SELECT only distance (qualified) → rule must fire\nPlan: {plan:?}"
);
}

/// Bare table, SELECT only distance (no alias), ORDER BY the UDF directly.
#[tokio::test]
async fn test_select_only_distance_no_alias_rewrites() {
let ctx = make_ctx(MetricKind::L2sq).await;
let sql = format!(
"SELECT l2_distance(vector, {Q}) FROM items ORDER BY l2_distance(vector, {Q}) ASC LIMIT 5"
);
let plan = optimized_plan(&ctx, &sql).await;
assert!(
contains_usearch_node(&plan),
"SELECT only distance (no alias, ORDER BY UDF) → rule must fire\nPlan: {plan:?}"
);
}
Loading