Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
a7e220f
Add arrow fetch support
ffelixg Nov 30, 2025
68482fb
Copilot suggestion: Fix typo
ffelixg Nov 30, 2025
3645578
Copilot suggestion: Fix missing buffer resize
ffelixg Nov 30, 2025
9c8c3e8
Copilot suggestion: Initialize bool value buffer
ffelixg Nov 30, 2025
5267a33
Add test for long data
ffelixg Nov 30, 2025
b81f245
Copilot suggestion: Uppercase uuids
ffelixg Nov 30, 2025
532672c
Copilot suggestion: use new for batch schema format/name
ffelixg Nov 30, 2025
590fdf6
Replace free calls in release callbacks with unique pointers tracked …
ffelixg Dec 1, 2025
ad188bd
Eliminate potential memory leaks on allocation failures when transfer…
ffelixg Dec 6, 2025
d2c4881
Check returncode for SQLGetData
ffelixg Dec 6, 2025
05c204a
Fix null count array attribute
ffelixg Dec 6, 2025
582f366
Replace __int128_t by custom Int128_t for compatibility
ffelixg Dec 9, 2025
65c66be
Vendor days_from_civil to replace std::mktime
ffelixg Dec 9, 2025
62056c8
Expand test to make sure datetimeoffset via SQLGetData is covered
ffelixg Dec 9, 2025
0fbaabc
Merge branch 'main' into arrow_fetch
gargsaumya Jan 6, 2026
b4c44fd
Fix compilation on windows
ffelixg Jan 6, 2026
f0bca7b
Merge branch 'main' into arrow_fetch
bewithgaurav Jan 7, 2026
ef91fd9
Merge remote-tracking branch 'origin/main' into arrow_fetch
ffelixg Feb 9, 2026
9edb495
Fix SQL_REAL handling
ffelixg Feb 9, 2026
3b0cfc7
Fix rownumber -> behaves like fetchmany
ffelixg Feb 9, 2026
ba172f0
Check for closed cursor before pyarrow import
ffelixg Feb 9, 2026
554b93b
Add pyarrow as an optional dependency
ffelixg Feb 9, 2026
c0cee4a
Switch to large_string/large_binary for variable length data
ffelixg Feb 9, 2026
4b8d7b5
Move tests to separate file as requested
ffelixg Feb 9, 2026
042605e
Merge remote-tracking branch 'origin/main' into arrow_fetch
ffelixg Mar 4, 2026
e997c9b
Unbind & test in accordance with #441
ffelixg Mar 4, 2026
97959ce
Add UDT support like #423
ffelixg Mar 8, 2026
8b8d77c
Update bound varchar buffer size according to #444
ffelixg Mar 18, 2026
6a3190e
Factor pyarrow import out into _check_closed
ffelixg Mar 18, 2026
e2d9a9f
Add arrow fetch methods to stubs
ffelixg Mar 22, 2026
966ad97
Update GetDataVar error messages
ffelixg Mar 22, 2026
ec11800
Update ROW_ARRAY_SIZE for final batch instead of finding gcd & wrap i…
ffelixg Mar 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions mssql_python/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@
)

if TYPE_CHECKING:
import pyarrow # type: ignore
from mssql_python.connection import Connection
else:
pyarrow = None

# Constants for string handling
MAX_INLINE_CHAR: int = (
Expand Down Expand Up @@ -771,6 +774,18 @@ def _check_closed(self) -> None:
ddbc_error="",
)

def _ensure_pyarrow(self) -> Any:
"""
Import and return pyarrow or raise ImportError accordingly.
"""
try:
import pyarrow
return pyarrow
except ImportError as e:
raise ImportError(
"pyarrow is required for Arrow fetch methods. Please install pyarrow."
) from e

def setinputsizes(self, sizes: List[Union[int, tuple]]) -> None:
"""
Sets the type information to be used for parameters in execute and executemany.
Expand Down Expand Up @@ -2447,6 +2462,94 @@ def fetchall(self) -> List[Row]:
# On error, don't increment rownumber - rethrow the error
raise e

def arrow_batch(self, batch_size: int = 8192) -> "pyarrow.RecordBatch":
"""
Fetch a single pyarrow Record Batch of the specified size from the
query result set.

Args:
batch_size: Maximum number of rows to fetch in the Record Batch.

Returns:
A pyarrow RecordBatch object containing up to batch_size rows.
"""
self._check_closed() # Check if the cursor is closed
pyarrow = self._ensure_pyarrow()

if not self._has_result_set and self.description:
self._reset_rownumber()

capsules = []
ret = ddbc_bindings.DDBCSQLFetchArrowBatch(self.hstmt, capsules, max(batch_size, 0))
check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)

batch = pyarrow.RecordBatch._import_from_c_capsule(*capsules)

if self.hstmt:
self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))

# Update rownumber for the number of rows actually fetched
num_fetched = batch.num_rows
if num_fetched > 0 and self._has_result_set:
self._next_row_index += num_fetched
self._rownumber = self._next_row_index - 1

# Centralize rowcount assignment after fetch
if num_fetched == 0 and self._next_row_index == 0:
self.rowcount = 0
else:
self.rowcount = self._next_row_index

return batch

def arrow(self, batch_size: int = 8192) -> "pyarrow.Table":
"""
Fetch the entire result as a pyarrow Table.

Args:
batch_size: Size of the Record Batches which make up the Table.

Returns:
A pyarrow Table containing all remaining rows from the result set.
"""
self._check_closed() # Check if the cursor is closed
pyarrow = self._ensure_pyarrow()

batches: list["pyarrow.RecordBatch"] = []
while True:
batch = self.arrow_batch(batch_size)
if batch.num_rows < batch_size or batch_size <= 0:
if not batches or batch.num_rows > 0:
batches.append(batch)
break
batches.append(batch)
return pyarrow.Table.from_batches(batches, schema=batches[0].schema)

def arrow_reader(self, batch_size: int = 8192) -> "pyarrow.RecordBatchReader":
"""
Fetch the result as a pyarrow RecordBatchReader, which yields Record
Batches of the specified size until the current result set is
exhausted.

Args:
batch_size: Size of the Record Batches produced by the reader.

Returns:
A pyarrow RecordBatchReader for the result set.
"""
self._check_closed() # Check if the cursor is closed
pyarrow = self._ensure_pyarrow()

# Fetch schema without advancing cursor
schema_batch = self.arrow_batch(0)
schema = schema_batch.schema

def batch_generator():
while (batch := self.arrow_batch(batch_size)).num_rows > 0:
yield batch

return pyarrow.RecordBatchReader.from_batches(schema, batch_generator())

def nextset(self) -> Union[bool, None]:
"""
Skip to the next available result set.
Expand Down
6 changes: 6 additions & 0 deletions mssql_python/mssql_python.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Type stubs for mssql_python package - based on actual public API
from typing import Any, Dict, List, Optional, Union, Tuple, Sequence, Callable, Iterator
import datetime
import logging
import pyarrow

# GLOBALS - DB-API 2.0 Required Module Globals
# https://www.python.org/dev/peps/pep-0249/#module-interface
Expand Down Expand Up @@ -209,6 +210,11 @@ class Cursor:
def setinputsizes(self, sizes: List[Union[int, Tuple[Any, ...]]]) -> None: ...
def setoutputsize(self, size: int, column: Optional[int] = None) -> None: ...

# Arrow Extension Methods (requires pyarrow)
def arrow_batch(self, batch_size: int = 8192) -> pyarrow.RecordBatch: ...
def arrow(self, batch_size: int = 8192) -> pyarrow.Table: ...
def arrow_reader(self, batch_size: int = 8192) -> pyarrow.RecordBatchReader: ...

# DB-API 2.0 Connection Object
# https://www.python.org/dev/peps/pep-0249/#connection-objects
class Connection:
Expand Down
Loading