Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions Lib/_pyrepl/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,8 @@ class left(MotionCommand):
def do(self) -> None:
r = self.reader
for _ in range(r.get_arg()):
p = r.pos - 1
if p >= 0:
r.pos = p
if r.pos > 0:
r.pos = r.prev_grapheme_boundary()
else:
self.reader.error("start of buffer")

Expand All @@ -324,9 +323,8 @@ def do(self) -> None:
r = self.reader
b = r.buffer
for _ in range(r.get_arg()):
p = r.pos + 1
if p <= len(b):
r.pos = p
if r.pos < len(b):
r.pos = r.next_grapheme_boundary()
else:
self.reader.error("end of buffer")

Expand Down Expand Up @@ -409,8 +407,9 @@ def do(self) -> None:
b = r.buffer
for i in range(r.get_arg()):
if r.pos > 0:
r.pos -= 1
del b[r.pos]
prev = r.prev_grapheme_boundary()
del b[prev:r.pos]
r.pos = prev
r.dirty = True
else:
self.reader.error("can't backspace at start")
Expand All @@ -433,7 +432,8 @@ def do(self) -> None:

for i in range(r.get_arg()):
if r.pos != len(b):
del b[r.pos]
next_pos = r.next_grapheme_boundary()
del b[r.pos:next_pos]
r.dirty = True
else:
self.reader.error("end of buffer")
Expand Down
35 changes: 35 additions & 0 deletions Lib/_pyrepl/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from __future__ import annotations

import sys
import unicodedata
import _colorize

from contextlib import contextmanager
Expand Down Expand Up @@ -458,6 +459,40 @@ def eol(self, p: int | None = None) -> int:
p += 1
return p

def prev_grapheme_boundary(self, pos: int | None = None) -> int:
"""Return the position at the start of the grapheme cluster
preceding pos (or self.pos).

For plain ASCII this is just pos - 1. For combining characters
(e.g. 'e' + U+0301 COMBINING ACUTE ACCENT) it skips the whole
cluster so that one Backspace/Left deletes the visual character.
"""
if pos is None:
pos = self.pos
bol = self.bol(pos)
if pos <= bol:
return pos
line = "".join(self.buffer[bol:pos])
# Find the last grapheme cluster in the line up to pos
*_, last = unicodedata.iter_graphemes(line) # type: ignore[attr-defined]
return bol + last.start # type: ignore[no-any-return]

def next_grapheme_boundary(self, pos: int | None = None) -> int:
"""Return the position just past the grapheme cluster starting
at pos (or self.pos).

For plain ASCII this is just pos + 1. For combining characters
it skips the whole cluster.
"""
if pos is None:
pos = self.pos
eol = self.eol(pos)
if pos >= eol:
return pos
tail = "".join(self.buffer[pos:eol])
first = next(unicodedata.iter_graphemes(tail)) # type: ignore[attr-defined]
return pos + first.end # type: ignore[no-any-return]

def max_column(self, y: int) -> int:
"""Return the last x-offset for line y"""
return self.screeninfo[y][0] + sum(self.screeninfo[y][1])
Expand Down
10 changes: 8 additions & 2 deletions Lib/_pyrepl/readline.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,14 @@ def do(self) -> None:
if pi is not None and pi < indent:
repeat = indent - pi
break
r.pos -= repeat
del b[r.pos : r.pos + repeat]
if repeat == 1:
# Use grapheme-aware deletion for non-dedent case
prev = r.prev_grapheme_boundary()
del b[prev:r.pos]
r.pos = prev
else:
r.pos -= repeat
del b[r.pos : r.pos + repeat]
r.dirty = True
else:
self.reader.error("can't backspace at start")
Expand Down
84 changes: 84 additions & 0 deletions Lib/test/test_pyrepl/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,3 +558,87 @@ def test_control_characters(self):
reader, _ = handle_all_events(events)
self.assert_screen_equal(reader, 'flag = "🏳️\\u200d🌈"', clean=True)
self.assert_screen_equal(reader, 'flag {o}={z} {s}"🏳️\\u200d🌈"{z}'.format(**colors))

# -- grapheme cluster (combining character) tests --

def test_backspace_combining_character(self):
# 'e' + combining acute accent U+0301 = one visual char
events = itertools.chain(
code_to_events("e\u0301"),
[Event(evt="key", data="backspace", raw=bytearray(b"\x7f"))],
)
reader, _ = handle_all_events(events)
self.assertEqual(reader.buffer, [])
self.assertEqual(reader.pos, 0)

def test_backspace_combining_in_middle(self):
# "ae\u0301z" → backspace should remove "e\u0301", leaving "az"
events = itertools.chain(
code_to_events("ae\u0301z"),
[
Event(evt="key", data="left", raw=bytearray(b"\x1bOD")),
Event(evt="key", data="backspace", raw=bytearray(b"\x7f")),
],
)
reader, _ = handle_all_events(events)
self.assertEqual(reader.buffer, ["a", "z"])
self.assertEqual(reader.pos, 1)

def test_delete_combining_character(self):
# Cursor at start, delete should remove entire "e\u0301"
events = itertools.chain(
code_to_events("e\u0301"),
[
Event(evt="key", data="home", raw=bytearray(b"\x1b[H")),
Event(evt="key", data="delete", raw=bytearray(b"\x7f")),
],
)
reader, _ = handle_all_events(events)
self.assertEqual(reader.buffer, [])
self.assertEqual(reader.pos, 0)

def test_left_skips_combining_character(self):
# After typing "e\u0301", left should move past both codepoints
events = itertools.chain(
code_to_events("ae\u0301"),
[Event(evt="key", data="left", raw=bytearray(b"\x1bOD"))],
)
reader, _ = handle_all_events(events)
# Should land before 'e', not between 'e' and combining accent
self.assertEqual(reader.pos, 1)

def test_right_skips_combining_character(self):
# Move to start, then right should skip "e\u0301" as one unit
events = itertools.chain(
code_to_events("e\u0301z"),
[
Event(evt="key", data="home", raw=bytearray(b"\x1b[H")),
Event(evt="key", data="right", raw=bytearray(b"\x1bOC")),
],
)
reader, _ = handle_all_events(events)
# Should be past both 'e' and combining accent, before 'z'
self.assertEqual(reader.pos, 2)

def test_backspace_plain_ascii(self):
# Regression: plain ASCII should still work as before
events = itertools.chain(
code_to_events("abc"),
[Event(evt="key", data="backspace", raw=bytearray(b"\x7f"))],
)
reader, _ = handle_all_events(events)
self.assertEqual(reader.buffer, ["a", "b"])
self.assertEqual(reader.pos, 2)

def test_left_right_plain_ascii(self):
# Regression: plain ASCII left/right still move one char at a time
events = itertools.chain(
code_to_events("abc"),
[
Event(evt="key", data="left", raw=bytearray(b"\x1bOD")),
Event(evt="key", data="left", raw=bytearray(b"\x1bOD")),
Event(evt="key", data="right", raw=bytearray(b"\x1bOC")),
],
)
reader, _ = handle_all_events(events)
self.assertEqual(reader.pos, 2)
Loading