From 8d486128ec9be06b265b6c8124f7b8b839540662 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 15:58:26 +0100
Subject: [PATCH 01/10] Prebuild and cache inline image XML

Avoid calling python-docx per-image by generating a CT_Inline-based XML template once and using str.format() to fill sentinels (keeping compatibility with installed python-docx). Add caching of generated image XML per (part, descriptor, width, height) to skip repeated I/O, SHA1 work and header parsing. Use package.get_or_add_image_part and relate_to with RT.IMAGE, compute scaled_dimensions, assign shape_id from docx_ids_index, and xml-escape filenames. Also add a _image_cache dict on DocxTemplate and adjust hyperlink handling to use the local part variable.
---
 docxtpl/inline_image.py | 98 ++++++++++++++++++++++++++++++++++++++---
 docxtpl/template.py     |  1 +
 2 files changed, 93 insertions(+), 6 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index f860749..781976b 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -4,8 +4,62 @@
 
 @author: Eric Lapouyade
 """
+from xml.sax.saxutils import escape as xml_escape
+
+from docx.opc.constants import RELATIONSHIP_TYPE as RT
 from docx.oxml import OxmlElement, parse_xml
 from docx.oxml.ns import qn
+from docx.oxml.shape import CT_Inline
+from docx.shared import Emu
+
+
+def _build_inline_image_xml_template():
+    """Generate the XML format string by calling python-docx with sentinel values.
+
+    This ensures the template always matches the installed python-docx version's
+    XML structure, even after upgrades. We call CT_Inline.new_pic_inline() once
+    with recognizable sentinel values, serialize to XML, then replace the
+    sentinels with Python format placeholders.
+    """
+    import uuid
+
+    # Use GUIDs for string sentinels - guaranteed no collision with XML content
+    _RID_SENTINEL = str(uuid.uuid4())
+    _FILENAME_SENTINEL = str(uuid.uuid4())
+
+    # For numeric sentinels, use unique integers derived from UUIDs.
+    # shape_id is xsd:unsignedInt (max 4,294,967,295 / 32-bit).
+    # cx/cy are EMU values typed as xsd:long (64-bit).
+    # All use 9-digit range [100000000, 999999999] to stay within 32-bit
+    # and avoid any accidental collisions with each other.
+    _SHAPE_ID = uuid.uuid4().int % (9 * 10**8) + 10**8
+    _CX_INT = uuid.uuid4().int % (9 * 10**8) + 10**8
+    _CY_INT = uuid.uuid4().int % (9 * 10**8) + 10**8
+
+    inline = CT_Inline.new_pic_inline(
+        _SHAPE_ID,
+        _RID_SENTINEL,
+        _FILENAME_SENTINEL,
+        Emu(_CX_INT),
+        Emu(_CY_INT),
+    )
+    xml = inline.xml
+
+    # Replace sentinel values with format string placeholders
+    xml = xml.replace(str(_SHAPE_ID), "{shape_id}")
+    xml = xml.replace(_RID_SENTINEL, "{rId}")
+    xml = xml.replace(_FILENAME_SENTINEL, "{filename}")
+    xml = xml.replace(str(_CX_INT), "{cx}")
+    xml = xml.replace(str(_CY_INT), "{cy}")
+
+    return xml
+
+
+# Pre-built XML template for inline images, derived from the installed
+# python-docx version. Using str.format() on this template avoids calling
+# CT_Inline.new_pic_inline() per image (which does 2x parse_xml() +
+# element manipulation + .xml serialization each time).
+_INLINE_IMAGE_XML = _build_inline_image_xml_template()
 
 
 class InlineImage(object):
@@ -50,16 +104,48 @@ def _add_hyperlink(self, run, url, part):
         return run
 
     def _insert_image(self):
-        pic = self.tpl.current_rendering_part.new_pic_inline(
-            self.image_descriptor,
-            self.width,
-            self.height,
-        ).xml
+        part = self.tpl.current_rendering_part
+        image_descriptor = self.image_descriptor
+
+        # Cache generated XML per (part, descriptor, width, height) to avoid
+        # repeated file I/O, SHA1 computation, and header parsing.
+        cache = self.tpl._image_cache
+        cache_key = (id(part), image_descriptor, self.width, self.height)
+
+        if cache_key in cache:
+            pic = cache[cache_key]
+        else:
+            # Get or add the image part (handles deduplication via SHA1 internally)
+            package = part._package
+            image_part = package.get_or_add_image_part(image_descriptor)
+            rId = part.relate_to(image_part, RT.IMAGE)
+            image = image_part.image
+            cx, cy = image.scaled_dimensions(self.width, self.height)
+
+            # Assign shape_id from a simple counter. python-docx's
+            # new_pic_inline() would call its next_id property which does an
+            # XPath("//@id") over the entire XML tree on every call - but we
+            # bypass that entirely by generating the XML ourselves.
+            # fix_docpr_ids() renumbers all IDs after rendering anyway.
+            self.tpl.docx_ids_index += 1
+            shape_id = self.tpl.docx_ids_index
+
+            # Generate XML directly as a string using a pre-built template
+            # rather than calling CT_Inline.new_pic_inline() per image.
+            pic = _INLINE_IMAGE_XML.format(
+                cx=int(cx),
+                cy=int(cy),
+                shape_id=shape_id,
+                filename=xml_escape(image.filename),
+                rId=rId,
+            )
+            cache[cache_key] = pic
+
         if self.anchor:
             run = parse_xml(pic)
             if run.xpath(".//a:blip"):
                 hyperlink = self._add_hyperlink(
-                    run, self.anchor, self.tpl.current_rendering_part
+                    run, self.anchor, part
                 )
                 pic = hyperlink.xml
 
diff --git a/docxtpl/template.py b/docxtpl/template.py
index abcff49..a0d325b 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -171,6 +171,7 @@ def render_init(self):
         self.pic_map = {}
         self.current_rendering_part = None
         self.docx_ids_index = 1000
+        self._image_cache = {}
         self.is_saved = False
 
     def __getattr__(self, name):

From ddf1687f9dbf592199e4aff6cc52e541455d3616 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 16:25:48 +0100
Subject: [PATCH 02/10] Optimize image part deduplication

Add an O(1) SHA1 index for image parts and a fast _get_or_add_image_part helper on DocxTemplate to avoid python-docx's O(n) linear scan and repeated SHA1 recomputation. Initialize the index in the constructor (_init_image_parts_index), seed it from existing image parts, and maintain a sequential partname counter to prevent partname collisions. Update InlineImage to call tpl._get_or_add_image_part (which returns (image_part, image)) instead of package.get_or_add_image_part, and use the returned Image object. This improves performance and reduces redundant SHA1 work when inserting/looking up images.
---
 docxtpl/inline_image.py |  7 +++---
 docxtpl/template.py     | 55 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 781976b..7d353ad 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -115,11 +115,10 @@ def _insert_image(self):
         if cache_key in cache:
             pic = cache[cache_key]
         else:
-            # Get or add the image part (handles deduplication via SHA1 internally)
-            package = part._package
-            image_part = package.get_or_add_image_part(image_descriptor)
+            # Get or add the image part with O(1) SHA1 deduplication,
+            # avoiding the O(n) linear scan and SHA1 recomputation per lookup.
+            image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
             rId = part.relate_to(image_part, RT.IMAGE)
-            image = image_part.image
             cx, cy = image.scaled_dimensions(self.width, self.height)
 
             # Assign shape_id from a simple counter. python-docx's
diff --git a/docxtpl/template.py b/docxtpl/template.py
index a0d325b..69eb2f7 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -173,6 +173,61 @@ def render_init(self):
         self.docx_ids_index = 1000
         self._image_cache = {}
         self.is_saved = False
+        self._init_image_parts_index()
+
+    def _init_image_parts_index(self):
+        """Build an O(1) SHA1 index of existing image parts in the package.
+
+        This enables fast deduplication in _get_or_add_image_part(), avoiding
+        the O(n) linear scan and repeated SHA1 recomputation that occurs in
+        the default python-docx image-part lookup.
+        """
+        package = self.docx._part._package
+        image_parts = package.image_parts
+
+        # Seed the index from existing image parts in the template.
+        # ImagePart.sha1 recomputes on each access, but this is a one-time
+        # cost for the (typically few) images already in the template.
+        self._image_sha1_index = {}
+        for ip in image_parts:
+            self._image_sha1_index[ip.sha1] = ip
+
+        # Start the partname counter after all existing image parts to avoid
+        # collisions with partnames already in the package.
+        self._image_part_counter = len(image_parts._image_parts)
+
+    def _get_or_add_image_part(self, image_descriptor):
+        """Return (image_part, image) for the given image_descriptor.
+
+        Performs the same function as python-docx's
+        Package.get_or_add_image_part() but with O(1) SHA1 deduplication
+        (instead of O(n) linear scan with repeated SHA1 recomputation) and
+        sequential partname assignment (instead of O(n²) gap-search).
+        """
+        from docx.image.image import Image
+        from docx.opc.packuri import PackURI
+        from docx.parts.image import ImagePart
+
+        image = Image.from_file(image_descriptor)
+        sha1 = image.sha1  # @lazyproperty — computed once per Image object
+
+        image_part = self._image_sha1_index.get(sha1)
+        if image_part is not None:
+            return image_part, image
+
+        # New unique image — create part with sequential partname
+        self._image_part_counter += 1
+        partname = PackURI(
+            "/word/media/image%d.%s" % (self._image_part_counter, image.ext)
+        )
+        image_part = ImagePart.from_image(image, partname)
+
+        # Add to the package collection and the SHA1 index
+        package = self.docx._part._package
+        package.image_parts.append(image_part)
+        self._image_sha1_index[sha1] = image_part
+
+        return image_part, image
 
     def __getattr__(self, name):
         return getattr(self.docx, name)

From 4a96bc4b5b505812a15736538b98e9cf140299e0 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 16:51:28 +0100
Subject: [PATCH 03/10] Use descriptor cache for image deduplication

Replace the SHA1-based image-part index with a descriptor-keyed cache (_image_descriptor_index) to deduplicate images by file-path (O(1)) and avoid expensive SHA1 hashing. For string path descriptors the cache is used to return existing (image_part, image) tuples; non-string descriptors (e.g. file-like objects) fall back to always creating a new part. Keeps sequential partname assignment and appends new ImagePart to the package; caches the result for string descriptors. This improves performance when adding many images (e.g. large photos) by eliminating repeated SHA1 computation.
---
 docxtpl/inline_image.py |  6 +++---
 docxtpl/template.py     | 47 +++++++++++++++++++++--------------------
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 7d353ad..3c69168 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -108,15 +108,15 @@ def _insert_image(self):
         image_descriptor = self.image_descriptor
 
         # Cache generated XML per (part, descriptor, width, height) to avoid
-        # repeated file I/O, SHA1 computation, and header parsing.
+        # repeated file I/O, image hashing, and header parsing.
         cache = self.tpl._image_cache
         cache_key = (id(part), image_descriptor, self.width, self.height)
 
         if cache_key in cache:
             pic = cache[cache_key]
         else:
-            # Get or add the image part with O(1) SHA1 deduplication,
-            # avoiding the O(n) linear scan and SHA1 recomputation per lookup.
+            # Get or add the image part with O(1) descriptor-based dedup,
+            # avoiding the O(n) linear scan in python-docx's default path.
             image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
             rId = part.relate_to(image_part, RT.IMAGE)
             cx, cy = image.scaled_dimensions(self.width, self.height)
diff --git a/docxtpl/template.py b/docxtpl/template.py
index 69eb2f7..9e9faaf 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -176,21 +176,17 @@ def render_init(self):
         self._init_image_parts_index()
 
     def _init_image_parts_index(self):
-        """Build an O(1) SHA1 index of existing image parts in the package.
+        """Initialize image-part tracking for fast insertion.
 
-        This enables fast deduplication in _get_or_add_image_part(), avoiding
-        the O(n) linear scan and repeated SHA1 recomputation that occurs in
-        the default python-docx image-part lookup.
+        Uses a descriptor-keyed cache (file path string) for O(1) dedup of
+        images added during rendering, avoiding expensive content hashing.
         """
         package = self.docx._part._package
         image_parts = package.image_parts
 
-        # Seed the index from existing image parts in the template.
-        # ImagePart.sha1 recomputes on each access, but this is a one-time
-        # cost for the (typically few) images already in the template.
-        self._image_sha1_index = {}
-        for ip in image_parts:
-            self._image_sha1_index[ip.sha1] = ip
+        # Descriptor-keyed cache: maps image_descriptor -> (image_part, image)
+        # This is the primary dedup mechanism and avoids expensive content hashing.
+        self._image_descriptor_index = {}
 
         # Start the partname counter after all existing image parts to avoid
         # collisions with partnames already in the package.
@@ -199,35 +195,40 @@ def _init_image_parts_index(self):
     def _get_or_add_image_part(self, image_descriptor):
         """Return (image_part, image) for the given image_descriptor.
 
-        Performs the same function as python-docx's
-        Package.get_or_add_image_part() but with O(1) SHA1 deduplication
-        (instead of O(n) linear scan with repeated SHA1 recomputation) and
-        sequential partname assignment (instead of O(n²) gap-search).
+        Uses the descriptor itself (file path) as the dedup key, avoiding
+        expensive content hashing.  Falls back to always creating a new part
+        for non-hashable descriptors (file-like objects).
         """
         from docx.image.image import Image
         from docx.opc.packuri import PackURI
         from docx.parts.image import ImagePart
 
-        image = Image.from_file(image_descriptor)
-        sha1 = image.sha1  # @lazyproperty — computed once per Image object
+        # For string paths, use the path as a cheap dedup key.
+        cache_key = image_descriptor if isinstance(image_descriptor, str) else None
+
+        if cache_key is not None:
+            cached = self._image_descriptor_index.get(cache_key)
+            if cached is not None:
+                return cached
 
-        image_part = self._image_sha1_index.get(sha1)
-        if image_part is not None:
-            return image_part, image
+        image = Image.from_file(image_descriptor)
 
-        # New unique image — create part with sequential partname
+        # Create image part with sequential partname
         self._image_part_counter += 1
         partname = PackURI(
             "/word/media/image%d.%s" % (self._image_part_counter, image.ext)
         )
         image_part = ImagePart.from_image(image, partname)
 
-        # Add to the package collection and the SHA1 index
+        # Add to the package collection
         package = self.docx._part._package
         package.image_parts.append(image_part)
-        self._image_sha1_index[sha1] = image_part
 
-        return image_part, image
+        result = (image_part, image)
+        if cache_key is not None:
+            self._image_descriptor_index[cache_key] = result
+
+        return result
 
     def __getattr__(self, name):
         return getattr(self.docx, name)

From 98d8aba7b63b2f20be808d1017701eaf2665a324 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 17:39:45 +0100
Subject: [PATCH 04/10] Cache image metadata instead of XML

Cache only the expensive image metadata (rId, dimensions, filename) per (part, descriptor, width, height) instead of the full inline XML. A fresh shape_id is now assigned for every insertion so drawing IDs remain unique (important for headers/footers/footnotes which aren't renumbered by fix_docpr_ids()). This preserves performance benefits (avoids repeated image part lookup, hashing and header parsing) while preventing duplicate drawing IDs; cx/cy are stored as ints and filename is xml-escaped when cached.
---
 docxtpl/inline_image.py | 45 +++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 3c69168..10441f0 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -107,38 +107,39 @@ def _insert_image(self):
         part = self.tpl.current_rendering_part
         image_descriptor = self.image_descriptor
 
-        # Cache generated XML per (part, descriptor, width, height) to avoid
-        # repeated file I/O, image hashing, and header parsing.
+        # Cache the expensive parts (image part lookup, rId, dimensions) per
+        # (part, descriptor, width, height).  The XML string itself is NOT
+        # cached because each insertion needs a unique shape_id - header/footer
+        # and footnote parts are not renumbered by fix_docpr_ids().
         cache = self.tpl._image_cache
         cache_key = (id(part), image_descriptor, self.width, self.height)
 
         if cache_key in cache:
-            pic = cache[cache_key]
+            rId, cx, cy, filename = cache[cache_key]
         else:
             # Get or add the image part with O(1) descriptor-based dedup,
             # avoiding the O(n) linear scan in python-docx's default path.
             image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
             rId = part.relate_to(image_part, RT.IMAGE)
             cx, cy = image.scaled_dimensions(self.width, self.height)
-
-            # Assign shape_id from a simple counter. python-docx's
-            # new_pic_inline() would call its next_id property which does an
-            # XPath("//@id") over the entire XML tree on every call - but we
-            # bypass that entirely by generating the XML ourselves.
-            # fix_docpr_ids() renumbers all IDs after rendering anyway.
-            self.tpl.docx_ids_index += 1
-            shape_id = self.tpl.docx_ids_index
-
-            # Generate XML directly as a string using a pre-built template
-            # rather than calling CT_Inline.new_pic_inline() per image.
-            pic = _INLINE_IMAGE_XML.format(
-                cx=int(cx),
-                cy=int(cy),
-                shape_id=shape_id,
-                filename=xml_escape(image.filename),
-                rId=rId,
-            )
-            cache[cache_key] = pic
+            filename = xml_escape(image.filename)
+            cache[cache_key] = (rId, int(cx), int(cy), filename)
+
+        # Always assign a fresh shape_id per insertion so that drawing IDs
+        # are unique in every part (including headers/footers/footnotes
+        # which are not renumbered by fix_docpr_ids()).
+        self.tpl.docx_ids_index += 1
+        shape_id = self.tpl.docx_ids_index
+
+        # Generate XML directly as a string using a pre-built template
+        # rather than calling CT_Inline.new_pic_inline() per image.
+        pic = _INLINE_IMAGE_XML.format(
+            cx=int(cx),
+            cy=int(cy),
+            shape_id=shape_id,
+            filename=filename,
+            rId=rId,
+        )
 
         if self.anchor:
             run = parse_xml(pic)

From e4886535593541d6ee86443d28334dee12dd11a4 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 17:46:16 +0100
Subject: [PATCH 05/10] Handle non-hashable descriptors; escape quotes

Use id() for non-hashable image descriptors (e.g. file-like objects) when building the image cache key to avoid TypeError on dict lookup. Also escape double quotes in image filenames for XML attribute usage by passing a mapping to xml_escape so quotes become &quot;. Cache semantics and per-insertion shape_id assignment are otherwise unchanged.
---
 docxtpl/inline_image.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 10441f0..3a207be 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -112,7 +112,10 @@ def _insert_image(self):
         # cached because each insertion needs a unique shape_id - header/footer
         # and footnote parts are not renumbered by fix_docpr_ids().
         cache = self.tpl._image_cache
-        cache_key = (id(part), image_descriptor, self.width, self.height)
+        # Use id() for non-hashable descriptors (file-like objects) to avoid
+        # TypeError on dict lookup.
+        desc_key = image_descriptor if isinstance(image_descriptor, str) else id(image_descriptor)
+        cache_key = (id(part), desc_key, self.width, self.height)
 
         if cache_key in cache:
             rId, cx, cy, filename = cache[cache_key]
@@ -122,7 +125,8 @@ def _insert_image(self):
             image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
             rId = part.relate_to(image_part, RT.IMAGE)
             cx, cy = image.scaled_dimensions(self.width, self.height)
-            filename = xml_escape(image.filename)
+            # Escape for use inside XML attribute (quotes must be escaped)
+            filename = xml_escape(image.filename, {'"': "&quot;"})
             cache[cache_key] = (rId, int(cx), int(cy), filename)
 
         # Always assign a fresh shape_id per insertion so that drawing IDs

From 7c52c563f74e6ed7ed631213d6b0d126cffeecd3 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 17:49:15 +0100
Subject: [PATCH 06/10] Scan image partnames to derive counter

Avoid using len() of image parts to pick the next image partname index, which could collide when numbering is non-contiguous. Instead scan existing image partnames (using partname.baseURI when available, otherwise str(partname)), extract numeric suffixes with a regex (/image(\d+)\.), track the maximum index, and set the image part counter to that max. This ensures new image partnames won't reuse an already-present index.
---
 docxtpl/template.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index 9e9faaf..c63a2d3 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -188,9 +188,19 @@ def _init_image_parts_index(self):
         # This is the primary dedup mechanism and avoids expensive content hashing.
         self._image_descriptor_index = {}
 
-        # Start the partname counter after all existing image parts to avoid
-        # collisions with partnames already in the package.
-        self._image_part_counter = len(image_parts._image_parts)
+        # Derive the next partname index by scanning existing partnames once.
+        # Using len() alone would collide with non-contiguous numbering
+        # (e.g. image1.png + image3.png → len=2 → next would be image3.ext).
+        max_index = 0
+        for ip in image_parts:
+            # Partnames follow /word/media/imageN.ext pattern
+            name = ip.partname.baseURI if hasattr(ip.partname, 'baseURI') else str(ip.partname)
+            m = re.search(r'/image(\d+)\.', name)
+            if m:
+                idx = int(m.group(1))
+                if idx > max_index:
+                    max_index = idx
+        self._image_part_counter = max_index
 
     def _get_or_add_image_part(self, image_descriptor):
         """Return (image_part, image) for the given image_descriptor.

From 7581a333ec77046ab7e87bffeac0e78defd9b82d Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 17:55:58 +0100
Subject: [PATCH 07/10] Always use str(partname) for image parts

Replace conditional use of partname.baseURI with a direct str(partname) conversion when iterating image parts. This makes the code rely on a consistent string representation for part names (used by the /imageN.ext regex) and avoids depending on the presence of a baseURI attribute across different part implementations.
---
 docxtpl/template.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index c63a2d3..078d172 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -194,7 +194,7 @@ def _init_image_parts_index(self):
         max_index = 0
         for ip in image_parts:
             # Partnames follow /word/media/imageN.ext pattern
-            name = ip.partname.baseURI if hasattr(ip.partname, 'baseURI') else str(ip.partname)
+            name = str(ip.partname)
             m = re.search(r'/image(\d+)\.', name)
             if m:
                 idx = int(m.group(1))

From 82fd69c73314c005654a84998cd802964d0c1f8d Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 17:59:06 +0100
Subject: [PATCH 08/10] Initialize docx_ids_index from existing docPr ids

Replace the hardcoded docx_ids_index initialization with a routine that scans all package parts (body, headers, footers, footnotes) for wp:docPr elements and sets the counter above the maximum found id (minimum 1000). This prevents id collisions when inserting new drawings into parts that were not renumbered by fix_docpr_ids. The new method is called during initialization and safely skips non-XML or unreadable parts.
---
 docxtpl/template.py | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index 078d172..cf339df 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -170,10 +170,48 @@ def render_init(self):
         self.init_docx()
         self.pic_map = {}
         self.current_rendering_part = None
-        self.docx_ids_index = 1000
         self._image_cache = {}
         self.is_saved = False
         self._init_image_parts_index()
+        self._init_docx_ids_index()
+
+    def _init_docx_ids_index(self):
+        """Set docx_ids_index above the maximum existing wp:docPr id.
+
+        fix_docpr_ids() only renumbers the body tree, so IDs in headers,
+        footers, and footnotes retain their original values. Starting the
+        counter above the global maximum prevents collisions when inserting
+        new drawings into any part.
+        """
+        import docx.oxml.ns as _ns
+        wp_ns = _ns.nsmap['wp']
+        tag = "{%s}docPr" % wp_ns
+        max_id = 0
+
+        # Scan all parts (body + headers + footers + footnotes)
+        for part in self.docx._part._package.parts:
+            if not hasattr(part, 'blob') or part.blob is None:
+                continue
+            # Only scan XML parts that could contain drawings
+            ct = getattr(part, 'content_type', '')
+            if not ct.startswith('application/vnd.openxmlformats-officedocument'):
+                continue
+            try:
+                tree = etree.fromstring(part.blob)
+            except Exception:
+                continue
+            for elt in tree.iter(tag):
+                id_val = elt.get('id')
+                if id_val is not None:
+                    try:
+                        val = int(id_val)
+                        if val > max_id:
+                            max_id = val
+                    except ValueError:
+                        pass
+
+        # Start above the highest existing ID (minimum 1000 for safety)
+        self.docx_ids_index = max(max_id, 1000)
 
     def _init_image_parts_index(self):
         """Initialize image-part tracking for fast insertion.

From ef56632b1938690db98ee9b5cf2c2fe7a7eb34e4 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 18:05:16 +0100
Subject: [PATCH 09/10] Normalize None image filename before escaping

Treat image.filename == None (e.g., BytesIO/file-like descriptors) as an empty string before calling xml_escape so XML attribute generation matches python-docx behavior. Added a clarifying comment and ensure the escaped filename is stored in the cache to avoid None-related issues when rendering.
---
 docxtpl/inline_image.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 3a207be..684b5ce 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -125,8 +125,10 @@ def _insert_image(self):
             image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
             rId = part.relate_to(image_part, RT.IMAGE)
             cx, cy = image.scaled_dimensions(self.width, self.height)
-            # Escape for use inside XML attribute (quotes must be escaped)
-            filename = xml_escape(image.filename, {'"': "&quot;"})
+            # Escape for use inside XML attribute (quotes must be escaped).
+            # image.filename is None for file-like descriptors (BytesIO);
+            # normalize to empty string to match python-docx's behavior.
+            filename = xml_escape(image.filename or "", {'"': "&quot;"})
             cache[cache_key] = (rId, int(cx), int(cy), filename)
 
         # Always assign a fresh shape_id per insertion so that drawing IDs

From f316ca8a4b944ce83ea96cdb6990559a71d23f8d Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 18:15:13 +0100
Subject: [PATCH 10/10] Skip caching unhashable image descriptors

Only build and use a cache key when the image_descriptor is hashable. Previously id() was used for non-hashable descriptors (e.g. file-like objects), which could risk aliasing after GC and lead to incorrect deduplication. Now the code attempts to construct a cache key with the descriptor and falls back to skipping caching for unhashable descriptors; cache entries are only read/written when a valid cache_key exists. Filename normalization and per-insertion shape_id behavior are unchanged.
---
 docxtpl/inline_image.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 684b5ce..da35bbd 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -112,12 +112,16 @@ def _insert_image(self):
         # cached because each insertion needs a unique shape_id - header/footer
         # and footnote parts are not renumbered by fix_docpr_ids().
         cache = self.tpl._image_cache
-        # Use id() for non-hashable descriptors (file-like objects) to avoid
-        # TypeError on dict lookup.
-        desc_key = image_descriptor if isinstance(image_descriptor, str) else id(image_descriptor)
-        cache_key = (id(part), desc_key, self.width, self.height)
-
-        if cache_key in cache:
+        # For hashable descriptors (strings, paths), cache by value.
+        # For unhashable descriptors (file-like objects), skip caching
+        # entirely — using id() would risk aliasing after GC.
+        try:
+            cache_key = (id(part), image_descriptor, self.width, self.height)
+            hash(cache_key) is not None  # trigger TypeError if unhashable
+        except TypeError:
+            cache_key = None
+
+        if cache_key is not None and cache_key in cache:
             rId, cx, cy, filename = cache[cache_key]
         else:
             # Get or add the image part with O(1) descriptor-based dedup,
@@ -129,7 +133,8 @@ def _insert_image(self):
             # image.filename is None for file-like descriptors (BytesIO);
             # normalize to empty string to match python-docx's behavior.
             filename = xml_escape(image.filename or "", {'"': "&quot;"})
-            cache[cache_key] = (rId, int(cx), int(cy), filename)
+            if cache_key is not None:
+                cache[cache_key] = (rId, int(cx), int(cy), filename)
 
         # Always assign a fresh shape_id per insertion so that drawing IDs
         # are unique in every part (including headers/footers/footnotes