diff --git a/cluster-info.txt b/cluster-info.txt new file mode 100644 index 000000000..000835b5d --- /dev/null +++ b/cluster-info.txt @@ -0,0 +1,2 @@ +- `78be9c1` Merge pull request #21285 from gooddata/dho/cq-104-arrow-api +- `81eb97a` Merge pull request #21382 from gooddata/dho/cq-104-arrow-api-2 \ No newline at end of file diff --git a/cluster.json b/cluster.json new file mode 100644 index 000000000..6845ed6c5 --- /dev/null +++ b/cluster.json @@ -0,0 +1,24 @@ +{ + "id": "C010", + "title": "Add binary execution result endpoint for Apache Arrow format", + "services": [ + "gooddata-afm-client" + ], + "commits": [ + { + "sha": "78be9c10f3b9ec597f6282014708415e3527a3df", + "author": "Dan Homola", + "author_email": "dan.homola@gooddata.com", + "message": "Merge pull request #21285 from gooddata/dho/cq-104-arrow-api" + }, + { + "sha": "81eb97a771c9c18c47c508df3970ccab19e28108", + "author": "Dan Homola", + "author_email": "dan.homola@gooddata.com", + "message": "Merge pull request #21382 from gooddata/dho/cq-104-arrow-api-2" + } + ], + "diff": "--- a/gooddata-afm-client.json\n+++ b/gooddata-afm-client.json\n+ \"/api/v1/actions/workspaces/{workspaceId}/execution/afm/execute/result/{resultId}/binary\": {\n+ \"get\": {\n+ \"description\": \"(BETA) Gets a single execution result as an Apache Arrow IPC File or Stream format.\",\n+ \"operationId\": \"retrieveResultBinary\",\n+ \"responses\": {\n+ \"200\": {\n+ \"content\": {\n+ \"application/vnd.apache.arrow.file\": { \"schema\": { \"format\": \"binary\", \"type\": \"string\" } },\n+ \"application/vnd.apache.arrow.stream\": { \"schema\": { \"format\": \"binary\", \"type\": \"string\" } }\n+ },\n+ \"description\": \"Execution result was found and returned.\"\n+ }\n+ },\n+ \"summary\": \"(BETA) Get a single execution result in Apache Arrow File or Stream format\",\n+ \"x-gdc-security-info\": { \"permissions\": [\"VIEW\"] }\n+ }\n+ }", + "jira_tickets": [], + "sdk_impact": "new_feature" +} \ No newline at end of file diff --git a/packages/gooddata-sdk/src/gooddata_sdk/compute/model/execution.py b/packages/gooddata-sdk/src/gooddata_sdk/compute/model/execution.py index a81b807ac..dd3585ca7 100644 --- a/packages/gooddata-sdk/src/gooddata_sdk/compute/model/execution.py +++ b/packages/gooddata-sdk/src/gooddata_sdk/compute/model/execution.py @@ -372,6 +372,30 @@ def read_result( ) return ExecutionResult(execution_result) + def read_result_binary( + self, + timeout: Union[int, float, tuple] | None = None, + ) -> bytes: + """ + Reads the execution result in Apache Arrow binary format (IPC File or Stream). + + Args: + timeout: request timeout in seconds. If a tuple is provided, it is used as + (connection timeout, read timeout). + + Returns: + bytes: The execution result serialized as Apache Arrow IPC binary data. + """ + response = self._actions_api.retrieve_result_binary( + workspace_id=self._workspace_id, + result_id=self.result_id, + _check_return_type=False, + _preload_content=False, + _request_timeout=timeout, + **({"x_gdc_cancel_token": self.cancel_token} if self.cancel_token else {}), + ) + return response.data + def cancel(self) -> None: """ Cancels the execution backing this execution result. @@ -464,6 +488,22 @@ def read_result( ) -> ExecutionResult: return self.bare_exec_response.read_result(limit, offset, timeout) + def read_result_binary( + self, + timeout: Union[int, float, tuple] | None = None, + ) -> bytes: + """ + Reads the execution result in Apache Arrow binary format (IPC File or Stream). + + Args: + timeout: request timeout in seconds. If a tuple is provided, it is used as + (connection timeout, read timeout). + + Returns: + bytes: The execution result serialized as Apache Arrow IPC binary data. + """ + return self.bare_exec_response.read_result_binary(timeout) + def cancel(self) -> None: """ Cancels the execution. diff --git a/packages/gooddata-sdk/src/gooddata_sdk/compute/service.py b/packages/gooddata-sdk/src/gooddata_sdk/compute/service.py index 6163798b9..0df31ad81 100644 --- a/packages/gooddata-sdk/src/gooddata_sdk/compute/service.py +++ b/packages/gooddata-sdk/src/gooddata_sdk/compute/service.py @@ -73,6 +73,33 @@ def for_exec_def( else None, ) + def retrieve_result_binary( + self, + workspace_id: str, + result_id: str, + timeout: Union[int, float, tuple] | None = None, + ) -> bytes: + """ + Gets a single execution result in Apache Arrow binary format (IPC File or Stream). + + Args: + workspace_id (str): workspace identifier + result_id (str): execution result ID + timeout: request timeout in seconds. If a tuple is provided, it is used as + (connection timeout, read timeout). + + Returns: + bytes: The execution result serialized as Apache Arrow IPC binary data. + """ + response = self._actions_api.retrieve_result_binary( + workspace_id=workspace_id, + result_id=result_id, + _check_return_type=False, + _preload_content=False, + _request_timeout=timeout, + ) + return response.data + def retrieve_result_cache_metadata(self, workspace_id: str, result_id: str) -> ResultCacheMetadata: """ Gets execution result's metadata from GoodData.CN workspace for given execution result ID. diff --git a/packages/gooddata-sdk/tests/compute/test_retrieve_result_binary.py b/packages/gooddata-sdk/tests/compute/test_retrieve_result_binary.py new file mode 100644 index 000000000..2fb51c6eb --- /dev/null +++ b/packages/gooddata-sdk/tests/compute/test_retrieve_result_binary.py @@ -0,0 +1,151 @@ +# (C) 2024 GoodData Corporation +from __future__ import annotations + +from unittest.mock import MagicMock + +from gooddata_sdk.compute.model.execution import BareExecutionResponse +from gooddata_sdk.compute.service import ComputeService + + +def _make_mock_execution_response(result_id: str = "test-result-id") -> MagicMock: + """Create a mock AfmExecutionResponse with a given result_id.""" + mock_response = MagicMock() + mock_response.__getitem__ = MagicMock( + side_effect=lambda key: { + "execution_response": MagicMock( + __getitem__=MagicMock( + side_effect=lambda k: { + "links": {"executionResult": result_id}, + "dimensions": [], + }[k] + ) + ) + }[key] + ) + return mock_response + + +def test_bare_execution_response_read_result_binary(): + """Test that BareExecutionResponse.read_result_binary calls retrieve_result_binary correctly.""" + fake_binary_data = b"\x00\x01\x02\x03arrow_data" + mock_http_response = MagicMock() + mock_http_response.data = fake_binary_data + + mock_actions_api = MagicMock() + mock_actions_api.retrieve_result_binary.return_value = mock_http_response + + mock_api_client = MagicMock() + mock_api_client.actions_api = mock_actions_api + + mock_exec_response = _make_mock_execution_response("result-123") + + bare = BareExecutionResponse( + api_client=mock_api_client, + workspace_id="my-workspace", + execution_response=mock_exec_response, + cancel_token=None, + ) + + result = bare.read_result_binary() + + assert result == fake_binary_data + mock_actions_api.retrieve_result_binary.assert_called_once_with( + workspace_id="my-workspace", + result_id="result-123", + _check_return_type=False, + _preload_content=False, + _request_timeout=None, + ) + + +def test_bare_execution_response_read_result_binary_with_cancel_token(): + """Test that BareExecutionResponse.read_result_binary forwards cancel token.""" + fake_binary_data = b"arrow_stream_data" + mock_http_response = MagicMock() + mock_http_response.data = fake_binary_data + + mock_actions_api = MagicMock() + mock_actions_api.retrieve_result_binary.return_value = mock_http_response + + mock_api_client = MagicMock() + mock_api_client.actions_api = mock_actions_api + + mock_exec_response = _make_mock_execution_response("result-456") + + bare = BareExecutionResponse( + api_client=mock_api_client, + workspace_id="my-workspace", + execution_response=mock_exec_response, + cancel_token="cancel-token-xyz", + ) + + result = bare.read_result_binary(timeout=30) + + assert result == fake_binary_data + mock_actions_api.retrieve_result_binary.assert_called_once_with( + workspace_id="my-workspace", + result_id="result-456", + _check_return_type=False, + _preload_content=False, + _request_timeout=30, + x_gdc_cancel_token="cancel-token-xyz", + ) + + +def test_compute_service_retrieve_result_binary(): + """Test that ComputeService.retrieve_result_binary calls the API correctly.""" + fake_binary_data = b"apache_arrow_file_data" + mock_http_response = MagicMock() + mock_http_response.data = fake_binary_data + + mock_actions_api = MagicMock() + mock_actions_api.retrieve_result_binary.return_value = mock_http_response + + mock_api_client = MagicMock() + mock_api_client.actions_api = mock_actions_api + + service = ComputeService(api_client=mock_api_client) + + result = service.retrieve_result_binary( + workspace_id="workspace-1", + result_id="result-789", + ) + + assert result == fake_binary_data + mock_actions_api.retrieve_result_binary.assert_called_once_with( + workspace_id="workspace-1", + result_id="result-789", + _check_return_type=False, + _preload_content=False, + _request_timeout=None, + ) + + +def test_compute_service_retrieve_result_binary_with_timeout(): + """Test that ComputeService.retrieve_result_binary forwards timeout correctly.""" + fake_binary_data = b"apache_arrow_stream_data" + mock_http_response = MagicMock() + mock_http_response.data = fake_binary_data + + mock_actions_api = MagicMock() + mock_actions_api.retrieve_result_binary.return_value = mock_http_response + + mock_api_client = MagicMock() + mock_api_client.actions_api = mock_actions_api + + service = ComputeService(api_client=mock_api_client) + + result = service.retrieve_result_binary( + workspace_id="workspace-2", + result_id="result-abc", + timeout=(5, 60), + ) + + assert result == fake_binary_data + mock_actions_api.retrieve_result_binary.assert_called_once_with( + workspace_id="workspace-2", + result_id="result-abc", + _check_return_type=False, + _preload_content=False, + _request_timeout=(5, 60), + ) diff --git a/pyproject.toml b/pyproject.toml index 649a8d72b..35688edf1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,8 @@ dependencies = [ "gooddata-api-client", "gooddata-pipelines", "tests-support", + "pytest>=8.3.5", + "vcrpy>=8.0.0", ] [tool.uv] diff --git a/result.json b/result.json new file mode 100644 index 000000000..d6e1954c3 --- /dev/null +++ b/result.json @@ -0,0 +1,12 @@ +{ + "status": "implemented", + "cluster_id": "C010", + "summary": "Added SDK wrapper methods for the new binary execution result endpoint (GET /api/v1/actions/workspaces/{workspaceId}/execution/afm/execute/result/{resultId}/binary). \n\nThree new methods were added:\n1. `BareExecutionResponse.read_result_binary(timeout)` in `execution.py` — calls `retrieve_result_binary` with `_preload_content=False` and returns raw `bytes` (Apache Arrow IPC binary data). Forwards cancel token when present.\n2. `Execution.read_result_binary(timeout)` in `execution.py` — delegates to `bare_exec_response.read_result_binary()` for consistent API on the primary user-facing class.\n3. `ComputeService.retrieve_result_binary(workspace_id, result_id, timeout)` in `service.py` — standalone method for retrieving binary results directly when only workspace_id and result_id are available.\n\nAll methods use `_check_return_type=False` and `_preload_content=False` following SDK conventions, and return `bytes`. Four unit tests were added to verify parameter forwarding including cancel token and timeout handling.", + "files_changed": [ + "packages/gooddata-sdk/src/gooddata_sdk/compute/model/execution.py", + "packages/gooddata-sdk/src/gooddata_sdk/compute/service.py", + "packages/gooddata-sdk/tests/compute/test_retrieve_result_binary.py" + ], + "reason": "", + "cost_usd": 1.1491305 +} \ No newline at end of file diff --git a/uv.lock b/uv.lock index 36b749a54..0496f3cfc 100644 --- a/uv.lock +++ b/uv.lock @@ -1020,7 +1020,9 @@ dependencies = [ { name = "gooddata-pandas" }, { name = "gooddata-pipelines" }, { name = "gooddata-sdk" }, + { name = "pytest" }, { name = "tests-support" }, + { name = "vcrpy" }, ] [package.dev-dependencies] @@ -1070,7 +1072,9 @@ requires-dist = [ { name = "gooddata-pandas", editable = "packages/gooddata-pandas" }, { name = "gooddata-pipelines", editable = "packages/gooddata-pipelines" }, { name = "gooddata-sdk", editable = "packages/gooddata-sdk" }, + { name = "pytest", specifier = ">=8.3.5" }, { name = "tests-support", editable = "packages/tests-support" }, + { name = "vcrpy", specifier = ">=8.0.0" }, ] [package.metadata.requires-dev]