Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions features/modernization-phase4.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
Feature: Modernization Phase 4 — shape-tree ergonomics
In order to traverse, look up, and inspect shapes ergonomically
As a developer using python-pptx
I need iter_leaf_shapes, mapping-like name access, find_by_xpath, and selection-pane ordering


Scenario: Mapping-like name access on a slide's shapes
Given a fresh slide with a title placeholder
Then shapes["Title 1"] returns the title shape
And "Title 1" is in shapes
And "Bogus" is not in shapes


Scenario: Mapping-like name access on a slide's placeholders
Given a fresh slide with a title placeholder
Then placeholders["Title 1"] returns the title placeholder
And "Title 1" is in placeholders


Scenario: shapes.keys() returns the list of shape names
Given a fresh slide with a title placeholder
Then shapes.keys() includes "Title 1"


Scenario: in_selection_pane_order reverses XML order
Given a fresh slide with a title placeholder
Then shapes.in_selection_pane_order() reverses iteration order


Scenario: iter_leaf_shapes yields top-level shapes when no groups present
Given a fresh slide with a title placeholder
Then iter_leaf_shapes() yields the same shapes as iteration


Scenario: find_by_xpath returns a non-empty list for a known element
Given a fresh slide with a title placeholder
Then title.find_by_xpath(".//p:nvSpPr") has length 1


Scenario: find_by_xpath returns empty list on no match
Given a fresh slide with a title placeholder
Then title.find_by_xpath(".//a:nope_no_match") is empty
71 changes: 71 additions & 0 deletions features/steps/modernization_phase4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Gherkin steps for Modernization Phase 4 (issue #29) — shape-tree ergonomics."""

from __future__ import annotations

from behave import then


# the "Given a fresh slide with a title placeholder" step is shared from
# features/steps/modernization_phase2.py (Phase 2)


# then ====================================================


@then('shapes["Title 1"] returns the title shape')
def then_shapes_str_key_returns_title(context):
sh = context.slide.shapes["Title 1"]
assert sh.name == "Title 1", sh.name


@then('"Title 1" is in shapes')
def then_title_in_shapes(context):
assert "Title 1" in context.slide.shapes


@then('"Bogus" is not in shapes')
def then_bogus_not_in_shapes(context):
assert "Bogus" not in context.slide.shapes


@then('placeholders["Title 1"] returns the title placeholder')
def then_placeholders_str_key(context):
ph = context.slide.placeholders["Title 1"]
assert ph.name == "Title 1", ph.name


@then('"Title 1" is in placeholders')
def then_title_in_placeholders(context):
assert "Title 1" in context.slide.placeholders


@then('shapes.keys() includes "Title 1"')
def then_shapes_keys_includes_title(context):
assert "Title 1" in context.slide.shapes.keys()


@then("shapes.in_selection_pane_order() reverses iteration order")
def then_selection_pane_reverses(context):
xml_order = [s.name for s in context.slide.shapes]
sp_order = [s.name for s in context.slide.shapes.in_selection_pane_order()]
assert sp_order == list(reversed(xml_order)), (sp_order, xml_order)


@then("iter_leaf_shapes() yields the same shapes as iteration")
def then_iter_leaf_matches_iter(context):
leaves = [s.name for s in context.slide.shapes.iter_leaf_shapes()]
top = [s.name for s in context.slide.shapes]
assert leaves == top, (leaves, top)


@then('title.find_by_xpath(".//p:nvSpPr") has length 1')
def then_xpath_match_length_1(context):
title = context.slide.shapes.title
results = title.find_by_xpath(".//p:nvSpPr")
assert len(results) == 1, len(results)


@then('title.find_by_xpath(".//a:nope_no_match") is empty')
def then_xpath_empty(context):
title = context.slide.shapes.title
assert title.find_by_xpath(".//a:nope_no_match") == []
29 changes: 29 additions & 0 deletions src/pptx/shapes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,19 @@

from typing import TYPE_CHECKING, cast

from lxml.etree import _Element # pyright: ignore[reportPrivateUsage]

from pptx.action import ActionSetting
from pptx.dml.effect import ShadowFormat
from pptx.shared import ElementProxy
from pptx.util import lazyproperty

# ---bound to the lxml base method so `find_by_xpath(..., namespaces=ns)` can
# ---honor the caller's prefix map without going through the project's
# ---`BaseOxmlElement.xpath` override (which auto-applies the project nsmap
# ---and rejects `namespaces=` kwarg).
_LXML_XPATH = _Element.xpath

if TYPE_CHECKING:
from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
from pptx.oxml.shapes import ShapeElement
Expand Down Expand Up @@ -65,6 +73,27 @@ def element(self) -> ShapeElement:
"""
return self._element

def find_by_xpath(self, xpath: str, namespaces: "dict[str, str] | None" = None) -> list:
"""Power-user XPath escape hatch over this shape's element subtree.

Returns whatever ``lxml.etree._Element.xpath`` returns — typically a
list of matching elements, or an empty list when the expression
matches nothing. When ``namespaces`` is |None| (default), the
project's standard namespace map is used so common prefixes
(``a:``, ``p:``, ``r:``, ``xsi:``, ``adec:``, ``p14:``, etc.) work
without explicit declaration. Pass a custom dict to override.

Example::

for t_elm in shape.find_by_xpath(".//a:t"):
print(t_elm.text)
"""
if namespaces is None:
# ---project's BaseOxmlElement.xpath auto-applies the standard nsmap---
return self._element.xpath(xpath)
# ---custom nsmap: bypass the project wrapper (see _LXML_XPATH note above)---
return _LXML_XPATH(self._element, xpath, namespaces=namespaces)

@property
def has_chart(self) -> bool:
"""|True| if this shape is a graphic frame containing a chart object.
Expand Down
134 changes: 123 additions & 11 deletions src/pptx/shapes/shapetree.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,28 @@ def __init__(self, spTree: CT_GroupShape, parent: ProvidesPart):
self._spTree = spTree
self._cached_max_shape_id = None

def __getitem__(self, idx: int) -> BaseShape:
"""Return shape at `idx` in sequence, e.g. `shapes[2]`."""
def __getitem__(self, key: int | str) -> BaseShape:
"""Return shape at `key`. Mapping-like dispatch by key type.

- Integer ``key`` returns the shape at that index in document
order, e.g. ``shapes[2]``. Raises |IndexError| if out of range.
- String ``key`` returns the shape whose ``.name`` equals ``key``
(the same lookup as :meth:`by_name`), e.g. ``shapes["Title 1"]``.
Raises |KeyError| with a clear message on miss.

``bool`` keys are rejected (|TypeError|) — they're a subclass of
``int`` so would otherwise silently resolve to index 0/1, which
is almost certainly an unintended call.

Closes scanny/python-pptx#800.
"""
if isinstance(key, bool):
raise TypeError("shape key must be int or str, got bool")
if isinstance(key, str):
return self.by_name(key)
shape_elms = list(self._iter_member_elms())
try:
shape_elm = shape_elms[idx]
shape_elm = shape_elms[key]
except IndexError:
raise IndexError("shape index out of range")
return self._shape_factory(shape_elm)
Expand Down Expand Up @@ -125,6 +142,67 @@ def by_name(self, name: str) -> BaseShape:
return shape
raise KeyError("no shape named %r in this collection" % name)

def __contains__(self, key: object) -> bool:
"""Mapping-like membership: `"Title 1" in shapes` checks names.

- String key: True when any shape in this collection has a matching
``.name`` (case-sensitive).
- Integer key: True when ``0 <= key < len(self)`` — sequence-style
index range check, matching `__getitem__(int)` semantics.

``bool`` and other key types return False (no implicit coercion;
bools rejected for the same reason `__getitem__` rejects them —
``True``/``False`` as an index is almost always a bug).
"""
if isinstance(key, bool):
return False
if isinstance(key, str):
return any(shape.name == key for shape in self)
if isinstance(key, int):
return 0 <= key < len(self)
return False

def keys(self) -> list[str]:
"""List of every shape's ``.name`` in document order.

Mapping-like helper. Names may not be unique (PowerPoint doesn't
enforce); duplicates appear in iteration order.
"""
return [shape.name for shape in self]

def iter_leaf_shapes(self) -> Iterator[BaseShape]:
"""Recursively yield every non-group shape in this collection.

Descends into `GroupShape` children; the group containers themselves
are NOT yielded — only the leaf shapes (autoshapes, pictures,
connectors, text frames, tables, charts, placeholders, etc.) inside
them. A consumer wanting the group containers should use the
regular `for shape in shapes` iteration.

Closes scanny/python-pptx#435.
"""
# ---deferred import to avoid circular dependency---
from pptx.shapes.group import GroupShape

for shape in self:
if isinstance(shape, GroupShape):
yield from shape.shapes.iter_leaf_shapes()
else:
yield shape

def in_selection_pane_order(self) -> tuple[BaseShape, ...]:
"""Return shapes in PowerPoint's Selection Pane order.

The Selection Pane lists shapes from top-most (most recently drawn,
rendered on top) to bottom-most. Top-most in PowerPoint is the
last child in XML document order, so this is the reverse of
``tuple(self)``. Read-only snapshot — does not auto-update if
the collection changes after the call.

Closes scanny/python-pptx#532.
"""
return tuple(reversed(list(self)))

def clone_placeholder(self, placeholder: LayoutPlaceholder) -> None:
"""Add a new placeholder shape based on `placeholder`."""
sp = placeholder.element
Expand Down Expand Up @@ -859,22 +937,56 @@ def _shape_factory( # pyright: ignore[reportIncompatibleMethodOverride]
class SlidePlaceholders(ParentedElementProxy):
"""Collection of placeholder shapes on a slide.

Supports iteration, :func:`len`, and dictionary-style lookup on the `idx` value of the
placeholders it contains.
Supports iteration, :func:`len`, and dictionary-style lookup by both the
`idx` value (int) and the placeholder ``.name`` (str).
"""

_element: CT_GroupShape

def __getitem__(self, idx: int):
"""Access placeholder shape having `idx`.
def __getitem__(self, key: int | str):
"""Access placeholder shape by `idx` value (int) or `.name` (str).

Note that while this looks like list access, integer ``key`` is a
dictionary key against the placeholder's ``ph_idx`` (NOT a sequence
index) and will raise |KeyError| if no placeholder with that idx
is in the collection. String ``key`` looks up by ``.name`` and
raises |KeyError| on miss. ``bool`` keys are rejected (|TypeError|)
— they're a subclass of ``int`` so would otherwise silently resolve
to a `ph_idx == 0/1` lookup, almost certainly unintended.

Note that while this looks like list access, idx is actually a dictionary key and will
raise |KeyError| if no placeholder with that idx value is in the collection.
Closes scanny/python-pptx#800.
"""
if isinstance(key, bool):
raise TypeError("placeholder key must be int or str, got bool")
if isinstance(key, str):
for ph in self:
if ph.name == key:
return ph
raise KeyError("no placeholder named %r in this collection" % key)
for e in self._element.iter_ph_elms():
if e.ph_idx == idx:
if e.ph_idx == key:
return SlideShapeFactory(e, self)
raise KeyError("no placeholder on this slide with idx == %d" % idx)
raise KeyError("no placeholder on this slide with idx == %d" % key)

def __contains__(self, key: object) -> bool:
"""Mapping-like membership: `"Title 1" in placeholders` checks names.

- String key: True when any placeholder's ``.name`` matches.
- Integer key: True when a placeholder with that ``ph_idx`` exists.
- ``bool`` and other key types return False (bools rejected for the
same reason `__getitem__` rejects them).
"""
if isinstance(key, bool):
return False
if isinstance(key, str):
return any(ph.name == key for ph in self)
if isinstance(key, int):
return any(e.ph_idx == key for e in self._element.iter_ph_elms())
return False

def keys(self) -> list[str]:
"""List of every placeholder's ``.name`` in iteration order."""
return [ph.name for ph in self]

def __iter__(self):
"""Generate placeholder shapes in `idx` order."""
Expand Down
Loading
Loading