Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d3bf48c
feat: add callback parameter to _read_gbq_colab
shuoweil May 14, 2026
4258639
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 15, 2026
7574f22
feat: support execution history filtering by events and job_ids
shuoweil May 15, 2026
eccd236
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 15, 2026
d2eea39
feat: support automatic per-cell execution history filtering
shuoweil May 15, 2026
21d5dd8
format code
shuoweil May 15, 2026
97c7260
fix: avoid mutating extra_query_labels in place
shuoweil May 16, 2026
f080dda
format code
shuoweil May 16, 2026
4bd2735
fix: update pre-commit config to use ruff
shuoweil May 16, 2026
9dd03ed
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 18, 2026
dcae615
fix mypy
shuoweil May 18, 2026
eb548b6
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 18, 2026
7db1330
feat: universal per-cell execution history filtering
shuoweil May 18, 2026
da6682c
format code
shuoweil May 18, 2026
0ef9216
revert .pre-commit-config.yaml to old black/flake8 config
shuoweil May 18, 2026
66fe150
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 20, 2026
b9c9336
feat: propagate cell execution count to widget queries and rename his…
shuoweil May 20, 2026
5f9d824
refactor: deduplicate ipython execution count
shuoweil May 20, 2026
d9a5593
feat: change default of execution history all_cells to True
shuoweil May 20, 2026
0c8f8c7
fix IPython mocking in unit tests
shuoweil May 20, 2026
81bf807
Merge main
shuoweil May 21, 2026
409a2fb
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 22, 2026
f158662
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 26, 2026
eb3934b
address pr review comments on execution history
shuoweil May 26, 2026
ee477b8
format code
shuoweil May 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/bigframes/bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,7 @@ def to_pandas_batches(
page_size: Optional[int] = None,
max_results: Optional[int] = None,
allow_large_results: Optional[bool] = None,
cell_execution_count: Optional[int] = None,
) -> PandasBatches:
"""Download results one message at a time.

Expand All @@ -713,6 +714,7 @@ def to_pandas_batches(
execution_spec.ExecutionSpec(
promise_under_10gb=under_10gb,
ordered=True,
cell_execution_count=cell_execution_count,
),
)
result_batches = execution_result.batches()
Expand Down
15 changes: 14 additions & 1 deletion packages/bigframes/bigframes/core/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import datetime
import threading
import uuid
from typing import Any, Callable, Literal, Set
from typing import Any, Callable, Literal, Optional, Set

import google.cloud.bigquery._job_helpers
import google.cloud.bigquery.job.query
Expand Down Expand Up @@ -127,8 +127,21 @@ class Event:

@dataclasses.dataclass(frozen=True)
class EventEnvelope:
"""An envelope that wraps an execution event with metadata and display options.

Attributes:
event:
The actual execution event details (e.g., ExecutionStarted, BigQuerySentEvent).
progress_bar:
Specifies the style of progress bar to display during execution.
cell_execution_count:
The 1-indexed execution count of the notebook cell that triggered the event.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it job count? Please be explicit.

Used to group and filter execution history on a per-cell basis.
"""

event: Event
progress_bar: ProgressBarType = _DEFAULT
cell_execution_count: Optional[int] = None
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't figure out what does it mean by just reading the params. Add doc strings to explain this and other parameters.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a clear, comprehensive docstring to the EventEnvelope class in packages/bigframes/bigframes/core/events.py. It now explicitly documents what the wrapper does and describes each parameter, including how cell_execution_count is captured and used to filter and scope execution history on a per-cell basis.



@dataclasses.dataclass(frozen=True)
Expand Down
18 changes: 13 additions & 5 deletions packages/bigframes/bigframes/core/global_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import threading
import traceback
import warnings
from typing import TYPE_CHECKING, Callable, Optional, TypeVar
from typing import TYPE_CHECKING, Callable, Iterable, Optional, TypeVar

import google.auth.exceptions

Expand Down Expand Up @@ -124,12 +124,20 @@ def with_default_session(func_: Callable[..., _T], *args, **kwargs) -> _T:
return func_(get_global_session(), *args, **kwargs)


def execution_history() -> "bigframes.session._ExecutionHistory":
import pandas # noqa: F401

def execution_history(
*,
events: Optional[Iterable[bigframes.core.events.Event]] = None,
job_ids: Optional[Iterable[str]] = None,
all_cells: bool = True,
) -> "bigframes.session._ExecutionHistory":
import bigframes.session

return with_default_session(bigframes.session.Session.execution_history)
return with_default_session(
bigframes.session.Session.execution_history,
events=events,
job_ids=job_ids,
all_cells=all_cells,
)


class _GlobalSessionContext:
Expand Down
13 changes: 13 additions & 0 deletions packages/bigframes/bigframes/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,16 @@ def timedelta_to_micros(
) * 1_000_000 + timedelta.microseconds

raise TypeError(f"Unrecognized input type: {type(timedelta)}")


def get_ipython_execution_count() -> typing.Optional[int]:
"""Returns the current IPython cell execution count if running in a notebook, else None."""
try:
import IPython

ipy = IPython.get_ipython()
if ipy is not None and hasattr(ipy, "execution_count"):
return ipy.execution_count
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't rely on monkey patching, may be consider some existing settings or properties.

except (ImportError, NameError):
pass
return None
4 changes: 4 additions & 0 deletions packages/bigframes/bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1755,6 +1755,7 @@ def to_pandas_batches(
max_results: Optional[int] = None,
*,
allow_large_results: Optional[bool] = None,
cell_execution_count: Optional[int] = None,
) -> blocks.PandasBatches:
"""Stream DataFrame results to an iterable of pandas DataFrame.

Expand Down Expand Up @@ -1807,6 +1808,7 @@ def to_pandas_batches(
page_size=page_size,
max_results=max_results,
allow_large_results=allow_large_results,
cell_execution_count=cell_execution_count,
)

def _to_pandas_batches(
Expand All @@ -1815,11 +1817,13 @@ def _to_pandas_batches(
max_results: Optional[int] = None,
*,
allow_large_results: Optional[bool] = None,
cell_execution_count: Optional[int] = None,
) -> blocks.PandasBatches:
return self._block.to_pandas_batches(
page_size=page_size,
max_results=max_results,
allow_large_results=allow_large_results,
cell_execution_count=cell_execution_count,
)

def _compute_dry_run(self) -> google.cloud.bigquery.job.QueryJob:
Expand Down
12 changes: 10 additions & 2 deletions packages/bigframes/bigframes/display/anywidget.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):

self._dataframe = dataframe

from bigframes.core.utils import get_ipython_execution_count

self._cell_execution_count = get_ipython_execution_count()

super().__init__()

# Initialize attributes that might be needed by observers first
Expand Down Expand Up @@ -286,7 +290,10 @@ def _reset_batch_cache(self) -> None:
def _reset_batches_for_new_page_size(self) -> None:
"""Reset the batch iterator when page size changes."""
with bigframes.option_context("display.progress_bar", None):
self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
self._batches = self._dataframe.to_pandas_batches(
page_size=self.page_size,
cell_execution_count=self._cell_execution_count,
)

self._reset_batch_cache()

Expand Down Expand Up @@ -318,7 +325,8 @@ def _set_table_html(self) -> None:
current_sort_state = _SortState(tuple(sort_columns), tuple(sort_ascending))
if self._last_sort_state != current_sort_state:
self._batches = df_to_display.to_pandas_batches(
page_size=self.page_size
page_size=self.page_size,
cell_execution_count=self._cell_execution_count,
)
self._reset_batch_cache()
self._last_sort_state = current_sort_state
Expand Down
14 changes: 10 additions & 4 deletions packages/bigframes/bigframes/pandas/io/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,23 +300,26 @@ def _try_read_gbq_colab_sessionless_dry_run(
def _read_gbq_colab( # type: ignore[overload-overlap]
query_or_table: str,
*,
pyformat_args: Optional[Dict[str, Any]] = ...,
dry_run: Literal[False] = ...,
callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = None,
pyformat_args: Optional[Dict[str, Any]] = None,
dry_run: Literal[False] = False,
) -> bigframes.dataframe.DataFrame: ...


@overload
def _read_gbq_colab(
query_or_table: str,
*,
pyformat_args: Optional[Dict[str, Any]] = ...,
dry_run: Literal[True] = ...,
callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = None,
pyformat_args: Optional[Dict[str, Any]] = None,
dry_run: Literal[True],
) -> pandas.Series: ...


def _read_gbq_colab(
query_or_table: str,
*,
callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = None,
pyformat_args: Optional[Dict[str, Any]] = None,
dry_run: bool = False,
) -> bigframes.dataframe.DataFrame | pandas.Series:
Expand All @@ -328,6 +331,8 @@ def _read_gbq_colab(
Args:
query_or_table (str):
SQL query or table ID (table ID not yet supported).
callback (Optional[Callable[[bigframes.core.events.EventEnvelope], None]]):
Callback to receive query execution events.
pyformat_args (Optional[Dict[str, Any]]):
Parameters to format into the query string.
dry_run (bool):
Expand Down Expand Up @@ -379,6 +384,7 @@ def _read_gbq_colab(
return global_session.with_default_session(
bigframes.session.Session._read_gbq_colab,
query_or_table,
callback=callback,
pyformat_args=pyformat_args,
dry_run=dry_run,
)
Expand Down
2 changes: 2 additions & 0 deletions packages/bigframes/bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,7 @@ def to_pandas_batches(
max_results: Optional[int] = None,
*,
allow_large_results: Optional[bool] = None,
cell_execution_count: Optional[int] = None,
) -> Iterable[pandas.Series]:
"""Stream Series results to an iterable of pandas Series.

Expand Down Expand Up @@ -812,6 +813,7 @@ def to_pandas_batches(
page_size=page_size,
max_results=max_results,
allow_large_results=allow_large_results,
cell_execution_count=cell_execution_count,
)
return map(lambda df: cast(pandas.Series, df.squeeze(1)), batches)

Expand Down
Loading
Loading