diff --git a/features/modernization-phase4.feature b/features/modernization-phase4.feature new file mode 100644 index 000000000..dab67a31d --- /dev/null +++ b/features/modernization-phase4.feature @@ -0,0 +1,42 @@ +Feature: Modernization Phase 4 — shape-tree ergonomics + In order to traverse, look up, and inspect shapes ergonomically + As a developer using python-pptx + I need iter_leaf_shapes, mapping-like name access, find_by_xpath, and selection-pane ordering + + + Scenario: Mapping-like name access on a slide's shapes + Given a fresh slide with a title placeholder + Then shapes["Title 1"] returns the title shape + And "Title 1" is in shapes + And "Bogus" is not in shapes + + + Scenario: Mapping-like name access on a slide's placeholders + Given a fresh slide with a title placeholder + Then placeholders["Title 1"] returns the title placeholder + And "Title 1" is in placeholders + + + Scenario: shapes.keys() returns the list of shape names + Given a fresh slide with a title placeholder + Then shapes.keys() includes "Title 1" + + + Scenario: in_selection_pane_order reverses XML order + Given a fresh slide with a title placeholder + Then shapes.in_selection_pane_order() reverses iteration order + + + Scenario: iter_leaf_shapes yields top-level shapes when no groups present + Given a fresh slide with a title placeholder + Then iter_leaf_shapes() yields the same shapes as iteration + + + Scenario: find_by_xpath returns a non-empty list for a known element + Given a fresh slide with a title placeholder + Then title.find_by_xpath(".//p:nvSpPr") has length 1 + + + Scenario: find_by_xpath returns empty list on no match + Given a fresh slide with a title placeholder + Then title.find_by_xpath(".//a:nope_no_match") is empty diff --git a/features/steps/modernization_phase4.py b/features/steps/modernization_phase4.py new file mode 100644 index 000000000..a46e45cdf --- /dev/null +++ b/features/steps/modernization_phase4.py @@ -0,0 +1,71 @@ +"""Gherkin steps for Modernization Phase 4 (issue #29) — shape-tree ergonomics.""" + +from __future__ import annotations + +from behave import then + + +# the "Given a fresh slide with a title placeholder" step is shared from +# features/steps/modernization_phase2.py (Phase 2) + + +# then ==================================================== + + +@then('shapes["Title 1"] returns the title shape') +def then_shapes_str_key_returns_title(context): + sh = context.slide.shapes["Title 1"] + assert sh.name == "Title 1", sh.name + + +@then('"Title 1" is in shapes') +def then_title_in_shapes(context): + assert "Title 1" in context.slide.shapes + + +@then('"Bogus" is not in shapes') +def then_bogus_not_in_shapes(context): + assert "Bogus" not in context.slide.shapes + + +@then('placeholders["Title 1"] returns the title placeholder') +def then_placeholders_str_key(context): + ph = context.slide.placeholders["Title 1"] + assert ph.name == "Title 1", ph.name + + +@then('"Title 1" is in placeholders') +def then_title_in_placeholders(context): + assert "Title 1" in context.slide.placeholders + + +@then('shapes.keys() includes "Title 1"') +def then_shapes_keys_includes_title(context): + assert "Title 1" in context.slide.shapes.keys() + + +@then("shapes.in_selection_pane_order() reverses iteration order") +def then_selection_pane_reverses(context): + xml_order = [s.name for s in context.slide.shapes] + sp_order = [s.name for s in context.slide.shapes.in_selection_pane_order()] + assert sp_order == list(reversed(xml_order)), (sp_order, xml_order) + + +@then("iter_leaf_shapes() yields the same shapes as iteration") +def then_iter_leaf_matches_iter(context): + leaves = [s.name for s in context.slide.shapes.iter_leaf_shapes()] + top = [s.name for s in context.slide.shapes] + assert leaves == top, (leaves, top) + + +@then('title.find_by_xpath(".//p:nvSpPr") has length 1') +def then_xpath_match_length_1(context): + title = context.slide.shapes.title + results = title.find_by_xpath(".//p:nvSpPr") + assert len(results) == 1, len(results) + + +@then('title.find_by_xpath(".//a:nope_no_match") is empty') +def then_xpath_empty(context): + title = context.slide.shapes.title + assert title.find_by_xpath(".//a:nope_no_match") == [] diff --git a/src/pptx/shapes/base.py b/src/pptx/shapes/base.py index 4af96dea7..bcf687290 100644 --- a/src/pptx/shapes/base.py +++ b/src/pptx/shapes/base.py @@ -4,11 +4,19 @@ from typing import TYPE_CHECKING, cast +from lxml.etree import _Element # pyright: ignore[reportPrivateUsage] + from pptx.action import ActionSetting from pptx.dml.effect import ShadowFormat from pptx.shared import ElementProxy from pptx.util import lazyproperty +# ---bound to the lxml base method so `find_by_xpath(..., namespaces=ns)` can +# ---honor the caller's prefix map without going through the project's +# ---`BaseOxmlElement.xpath` override (which auto-applies the project nsmap +# ---and rejects `namespaces=` kwarg). +_LXML_XPATH = _Element.xpath + if TYPE_CHECKING: from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER from pptx.oxml.shapes import ShapeElement @@ -65,6 +73,27 @@ def element(self) -> ShapeElement: """ return self._element + def find_by_xpath(self, xpath: str, namespaces: "dict[str, str] | None" = None) -> list: + """Power-user XPath escape hatch over this shape's element subtree. + + Returns whatever ``lxml.etree._Element.xpath`` returns — typically a + list of matching elements, or an empty list when the expression + matches nothing. When ``namespaces`` is |None| (default), the + project's standard namespace map is used so common prefixes + (``a:``, ``p:``, ``r:``, ``xsi:``, ``adec:``, ``p14:``, etc.) work + without explicit declaration. Pass a custom dict to override. + + Example:: + + for t_elm in shape.find_by_xpath(".//a:t"): + print(t_elm.text) + """ + if namespaces is None: + # ---project's BaseOxmlElement.xpath auto-applies the standard nsmap--- + return self._element.xpath(xpath) + # ---custom nsmap: bypass the project wrapper (see _LXML_XPATH note above)--- + return _LXML_XPATH(self._element, xpath, namespaces=namespaces) + @property def has_chart(self) -> bool: """|True| if this shape is a graphic frame containing a chart object. diff --git a/src/pptx/shapes/shapetree.py b/src/pptx/shapes/shapetree.py index 6b5f2f3ad..4a8bbb318 100644 --- a/src/pptx/shapes/shapetree.py +++ b/src/pptx/shapes/shapetree.py @@ -85,11 +85,28 @@ def __init__(self, spTree: CT_GroupShape, parent: ProvidesPart): self._spTree = spTree self._cached_max_shape_id = None - def __getitem__(self, idx: int) -> BaseShape: - """Return shape at `idx` in sequence, e.g. `shapes[2]`.""" + def __getitem__(self, key: int | str) -> BaseShape: + """Return shape at `key`. Mapping-like dispatch by key type. + + - Integer ``key`` returns the shape at that index in document + order, e.g. ``shapes[2]``. Raises |IndexError| if out of range. + - String ``key`` returns the shape whose ``.name`` equals ``key`` + (the same lookup as :meth:`by_name`), e.g. ``shapes["Title 1"]``. + Raises |KeyError| with a clear message on miss. + + ``bool`` keys are rejected (|TypeError|) — they're a subclass of + ``int`` so would otherwise silently resolve to index 0/1, which + is almost certainly an unintended call. + + Closes scanny/python-pptx#800. + """ + if isinstance(key, bool): + raise TypeError("shape key must be int or str, got bool") + if isinstance(key, str): + return self.by_name(key) shape_elms = list(self._iter_member_elms()) try: - shape_elm = shape_elms[idx] + shape_elm = shape_elms[key] except IndexError: raise IndexError("shape index out of range") return self._shape_factory(shape_elm) @@ -125,6 +142,67 @@ def by_name(self, name: str) -> BaseShape: return shape raise KeyError("no shape named %r in this collection" % name) + def __contains__(self, key: object) -> bool: + """Mapping-like membership: `"Title 1" in shapes` checks names. + + - String key: True when any shape in this collection has a matching + ``.name`` (case-sensitive). + - Integer key: True when ``0 <= key < len(self)`` — sequence-style + index range check, matching `__getitem__(int)` semantics. + + ``bool`` and other key types return False (no implicit coercion; + bools rejected for the same reason `__getitem__` rejects them — + ``True``/``False`` as an index is almost always a bug). + """ + if isinstance(key, bool): + return False + if isinstance(key, str): + return any(shape.name == key for shape in self) + if isinstance(key, int): + return 0 <= key < len(self) + return False + + def keys(self) -> list[str]: + """List of every shape's ``.name`` in document order. + + Mapping-like helper. Names may not be unique (PowerPoint doesn't + enforce); duplicates appear in iteration order. + """ + return [shape.name for shape in self] + + def iter_leaf_shapes(self) -> Iterator[BaseShape]: + """Recursively yield every non-group shape in this collection. + + Descends into `GroupShape` children; the group containers themselves + are NOT yielded — only the leaf shapes (autoshapes, pictures, + connectors, text frames, tables, charts, placeholders, etc.) inside + them. A consumer wanting the group containers should use the + regular `for shape in shapes` iteration. + + Closes scanny/python-pptx#435. + """ + # ---deferred import to avoid circular dependency--- + from pptx.shapes.group import GroupShape + + for shape in self: + if isinstance(shape, GroupShape): + yield from shape.shapes.iter_leaf_shapes() + else: + yield shape + + def in_selection_pane_order(self) -> tuple[BaseShape, ...]: + """Return shapes in PowerPoint's Selection Pane order. + + The Selection Pane lists shapes from top-most (most recently drawn, + rendered on top) to bottom-most. Top-most in PowerPoint is the + last child in XML document order, so this is the reverse of + ``tuple(self)``. Read-only snapshot — does not auto-update if + the collection changes after the call. + + Closes scanny/python-pptx#532. + """ + return tuple(reversed(list(self))) + def clone_placeholder(self, placeholder: LayoutPlaceholder) -> None: """Add a new placeholder shape based on `placeholder`.""" sp = placeholder.element @@ -859,22 +937,56 @@ def _shape_factory( # pyright: ignore[reportIncompatibleMethodOverride] class SlidePlaceholders(ParentedElementProxy): """Collection of placeholder shapes on a slide. - Supports iteration, :func:`len`, and dictionary-style lookup on the `idx` value of the - placeholders it contains. + Supports iteration, :func:`len`, and dictionary-style lookup by both the + `idx` value (int) and the placeholder ``.name`` (str). """ _element: CT_GroupShape - def __getitem__(self, idx: int): - """Access placeholder shape having `idx`. + def __getitem__(self, key: int | str): + """Access placeholder shape by `idx` value (int) or `.name` (str). + + Note that while this looks like list access, integer ``key`` is a + dictionary key against the placeholder's ``ph_idx`` (NOT a sequence + index) and will raise |KeyError| if no placeholder with that idx + is in the collection. String ``key`` looks up by ``.name`` and + raises |KeyError| on miss. ``bool`` keys are rejected (|TypeError|) + — they're a subclass of ``int`` so would otherwise silently resolve + to a `ph_idx == 0/1` lookup, almost certainly unintended. - Note that while this looks like list access, idx is actually a dictionary key and will - raise |KeyError| if no placeholder with that idx value is in the collection. + Closes scanny/python-pptx#800. """ + if isinstance(key, bool): + raise TypeError("placeholder key must be int or str, got bool") + if isinstance(key, str): + for ph in self: + if ph.name == key: + return ph + raise KeyError("no placeholder named %r in this collection" % key) for e in self._element.iter_ph_elms(): - if e.ph_idx == idx: + if e.ph_idx == key: return SlideShapeFactory(e, self) - raise KeyError("no placeholder on this slide with idx == %d" % idx) + raise KeyError("no placeholder on this slide with idx == %d" % key) + + def __contains__(self, key: object) -> bool: + """Mapping-like membership: `"Title 1" in placeholders` checks names. + + - String key: True when any placeholder's ``.name`` matches. + - Integer key: True when a placeholder with that ``ph_idx`` exists. + - ``bool`` and other key types return False (bools rejected for the + same reason `__getitem__` rejects them). + """ + if isinstance(key, bool): + return False + if isinstance(key, str): + return any(ph.name == key for ph in self) + if isinstance(key, int): + return any(e.ph_idx == key for e in self._element.iter_ph_elms()) + return False + + def keys(self) -> list[str]: + """List of every placeholder's ``.name`` in iteration order.""" + return [ph.name for ph in self] def __iter__(self): """Generate placeholder shapes in `idx` order.""" diff --git a/tests/test_modernization_phase4.py b/tests/test_modernization_phase4.py new file mode 100644 index 000000000..4b1dd58a4 --- /dev/null +++ b/tests/test_modernization_phase4.py @@ -0,0 +1,245 @@ +# pyright: reportPrivateUsage=false + +"""Unit-test suite for Modernization Phase 4 — shape-tree ergonomics. + +Covers: + +- ``_BaseShapes.iter_leaf_shapes()`` (closes scanny/python-pptx#435): + recursive traversal that descends into ``GroupShape`` children, yielding + only non-group leaf shapes. +- ``_BaseShapes.__getitem__(str)`` and ``SlidePlaceholders.__getitem__(str)`` + (closes scanny/python-pptx#800): Mapping-like name access. Plus + ``__contains__`` and ``keys()`` helpers on both collections. +- ``BaseShape.find_by_xpath(xpath, namespaces=None)``: power-user XPath + escape hatch over a shape's element subtree. +- ``_BaseShapes.in_selection_pane_order()`` (closes scanny/python-pptx#532): + return shapes in PowerPoint's Selection Pane order (reverse XML / z-order). +- Anti-criteria: integer ``__getitem__`` keeps existing behavior; iter + semantics unchanged. + +Issue: https://github.com/MHoroszowski/python-pptx/issues/29 (Phase 4). +""" + +from __future__ import annotations + +import pytest + +from pptx import Presentation +from pptx.shapes.group import GroupShape +from pptx.util import Inches + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +def _make_slide_with_two_placeholders(): + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[1]) + return prs, slide + + +@pytest.fixture +def slide_fixture(): + _, slide = _make_slide_with_two_placeholders() + return slide + + +@pytest.fixture +def slide_with_extra_textboxes(): + _, slide = _make_slide_with_two_placeholders() + # ---add three textboxes so we have 5 shapes total--- + for i in range(3): + tb = slide.shapes.add_textbox(Inches(1 + i), Inches(3), Inches(1), Inches(0.5)) + tb.name = "Box %d" % (i + 1) + return slide + + +# --------------------------------------------------------------------------- +# iter_leaf_shapes (scanny#435) +# --------------------------------------------------------------------------- + + +class DescribeShapes_iter_leaf_shapes(object): + """Unit-test suite for `_BaseShapes.iter_leaf_shapes`.""" + + def it_returns_an_iterator(self, slide_fixture): + result = slide_fixture.shapes.iter_leaf_shapes() + # ---generator object satisfies the Iterator protocol--- + assert hasattr(result, "__next__") + assert hasattr(result, "__iter__") + + def it_yields_top_level_shapes_when_no_groups_present(self, slide_fixture): + leaves = list(slide_fixture.shapes.iter_leaf_shapes()) + top_level = list(slide_fixture.shapes) + assert len(leaves) == len(top_level) + assert [s.name for s in leaves] == [s.name for s in top_level] + + def it_excludes_GroupShape_instances(self, slide_with_extra_textboxes): + for s in slide_with_extra_textboxes.shapes.iter_leaf_shapes(): + assert not isinstance(s, GroupShape) + + def it_yields_5_shapes_for_a_slide_with_5_top_level_shapes(self, slide_with_extra_textboxes): + leaves = list(slide_with_extra_textboxes.shapes.iter_leaf_shapes()) + assert len(leaves) == 5 + + +# --------------------------------------------------------------------------- +# Mapping-like access (scanny#800) +# --------------------------------------------------------------------------- + + +class DescribeShapes_MappingAccess(object): + """Unit-test suite for `__getitem__(str)`, `__contains__`, `keys()`.""" + + def it_returns_shape_by_name_via_string_key(self, slide_fixture): + title = slide_fixture.shapes["Title 1"] + assert title.name == "Title 1" + + def it_raises_KeyError_on_unknown_name(self, slide_fixture): + with pytest.raises(KeyError): + slide_fixture.shapes["Bogus"] + + def it_keeps_integer_indexing_unchanged(self, slide_fixture): + first = slide_fixture.shapes[0] + # ---first shape on the Title+Content layout is the title placeholder--- + assert first.name == "Title 1" + + def it_raises_IndexError_on_int_out_of_range(self, slide_fixture): + with pytest.raises(IndexError): + slide_fixture.shapes[99] + + def it_supports_string_membership_check(self, slide_fixture): + assert "Title 1" in slide_fixture.shapes + assert "Bogus" not in slide_fixture.shapes + + def it_supports_integer_index_range_membership(self, slide_fixture): + assert 0 in slide_fixture.shapes + assert 99 not in slide_fixture.shapes + + def it_returns_False_for_other_key_types(self, slide_fixture): + assert (1.5 in slide_fixture.shapes) is False + assert (None in slide_fixture.shapes) is False + + def it_rejects_bool_keys_explicitly(self, slide_fixture): + # ---bool subclasses int; without this guard `shapes[True]` would + # ---silently resolve to index 1, almost always a bug. + with pytest.raises(TypeError): + slide_fixture.shapes[True] + with pytest.raises(TypeError): + slide_fixture.shapes[False] + # ---and `__contains__` should not match either way--- + assert (True in slide_fixture.shapes) is False + assert (False in slide_fixture.shapes) is False + + def it_lists_names_via_keys(self, slide_fixture): + names = slide_fixture.shapes.keys() + assert names == ["Title 1", "Content Placeholder 2"] + + +class DescribeSlidePlaceholders_MappingAccess(object): + """Unit-test suite for `SlidePlaceholders.__getitem__(str)` and friends.""" + + def it_returns_placeholder_by_name(self, slide_fixture): + title = slide_fixture.placeholders["Title 1"] + assert title.name == "Title 1" + + def it_raises_KeyError_on_unknown_name(self, slide_fixture): + with pytest.raises(KeyError): + slide_fixture.placeholders["Bogus"] + + def it_keeps_integer_idx_lookup_unchanged(self, slide_fixture): + # ---placeholder idx 0 = Title--- + title = slide_fixture.placeholders[0] + assert title.name == "Title 1" + + def it_raises_KeyError_on_unknown_idx(self, slide_fixture): + with pytest.raises(KeyError): + slide_fixture.placeholders[999] + + def it_supports_string_and_int_membership(self, slide_fixture): + assert "Title 1" in slide_fixture.placeholders + assert "Bogus" not in slide_fixture.placeholders + assert 0 in slide_fixture.placeholders + assert 999 not in slide_fixture.placeholders + + def it_lists_names_via_keys(self, slide_fixture): + names = slide_fixture.placeholders.keys() + assert "Title 1" in names + + +# --------------------------------------------------------------------------- +# find_by_xpath +# --------------------------------------------------------------------------- + + +class DescribeShape_find_by_xpath(object): + """Unit-test suite for `BaseShape.find_by_xpath`.""" + + def it_returns_matching_elements_with_default_nsmap(self, slide_fixture): + title = slide_fixture.shapes.title + results = title.find_by_xpath(".//p:nvSpPr") + assert len(results) == 1 + + def it_returns_empty_list_on_no_match(self, slide_fixture): + title = slide_fixture.shapes.title + assert title.find_by_xpath(".//a:nope_does_not_exist") == [] + + def it_accepts_a_custom_namespace_dict(self, slide_fixture): + title = slide_fixture.shapes.title + custom = title.find_by_xpath( + ".//foo:nvSpPr", + namespaces={"foo": "http://schemas.openxmlformats.org/presentationml/2006/main"}, + ) + assert len(custom) == 1 + + +# --------------------------------------------------------------------------- +# in_selection_pane_order (scanny#532) +# --------------------------------------------------------------------------- + + +class DescribeShapes_in_selection_pane_order(object): + """Unit-test suite for `_BaseShapes.in_selection_pane_order`.""" + + def it_returns_a_tuple(self, slide_fixture): + result = slide_fixture.shapes.in_selection_pane_order() + assert isinstance(result, tuple) + + def it_reverses_the_xml_document_order(self, slide_fixture): + xml_order = [s.name for s in slide_fixture.shapes] + sp_order = [s.name for s in slide_fixture.shapes.in_selection_pane_order()] + assert sp_order == list(reversed(xml_order)) + + def it_preserves_length(self, slide_with_extra_textboxes): + assert len(slide_with_extra_textboxes.shapes.in_selection_pane_order()) == len( + slide_with_extra_textboxes.shapes + ) + + def it_does_not_mutate_the_collection(self, slide_with_extra_textboxes): + before = [s.name for s in slide_with_extra_textboxes.shapes] + _ = slide_with_extra_textboxes.shapes.in_selection_pane_order() + after = [s.name for s in slide_with_extra_textboxes.shapes] + assert before == after + + +# --------------------------------------------------------------------------- +# Anti / Regression +# --------------------------------------------------------------------------- + + +class DescribePhase4_Regression(object): + """Anti-criteria — existing surfaces unchanged.""" + + def it_keeps_phase2_by_name_working(self, slide_fixture): + title = slide_fixture.shapes.by_name("Title 1") + assert title.name == "Title 1" + + def it_keeps_iteration_yielding_shape_values(self, slide_fixture): + # ---if we'd switched to Mapping ABC, __iter__ would yield keys--- + for s in slide_fixture.shapes: + assert hasattr(s, "name") + assert hasattr(s, "shape_id") + + def it_keeps_len_returning_shape_count(self, slide_fixture): + assert len(slide_fixture.shapes) == 2 # Title + Content placeholders