Skip to content

Commit ec14116

Browse files
committed
gh-53584: Prevent variable-nargs options from stealing required positional args
Options with nargs='?', nargs='*', or nargs='+' were greedily consuming argument strings that should have been reserved for required positional arguments. For example: parser.add_argument('--foo', nargs='?') parser.add_argument('bar') parser.parse_args(['--foo', 'abc']) # bar got nothing argparse works by pattern-matching, with the bulk of its logic defined in _parse_known_args(). That method encodes each argument string as a single character ('O' for a recognised option flag, 'A' for everything else, and '-' for strings following '--') which gives us a resulting string pattern (e.g. 'OAOA'). We then "consume" arguments using this string pattern, handling both positionals (via the consume_positionals() closure) and optionals (via consume_optional()). The bug arises in the latter. consume_optional() processes options by building a regex based on the option's nargs (e.g. '-*A?-*' for nargs='?') and then runs this regex against the remainder of the string pattern (i.e. anything following the option flag). This is done via _match_argument(). The regex will always stop at the next option flag ('O' token) but for non-fixed nargs values like '?' and '+' may greedily consume every positional ('A' token) up to that point. This fix works by manipulating the string pattern as part of our optional consumption. Any 'A' tokens that are required by remaining positionals are masked to 'O' to prevent the regex consuming them. Masking will only consider tokens up to the next option flag ('O') and it both accounts for what future options can absorb (to avoid masking more than is necessary) and ensures that at least the minimum arguments required for the optional are actually consumed. In addition, we also handle the parse_intermixed_args() case. In intermixed mode, positional-typed arguments already collected to the left of the current option are also accounted for, since they will satisfy positionals in the second-pass parse. Signed-off-by: Stephen Finucane <stephen@that.guru>
1 parent c2c4dfa commit ec14116

File tree

3 files changed

+96
-7
lines changed

3 files changed

+96
-7
lines changed

Lib/argparse.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2199,6 +2199,17 @@ def take_action(action, argument_strings, option_string=None):
21992199
if argument_values is not SUPPRESS:
22002200
action(self, namespace, argument_values, option_string)
22012201

2202+
# returns the minimum number of arg strings an action requires;
2203+
# used by consume_optional() when computing positional reservations
2204+
def min_for_action(action):
2205+
nargs = action.nargs
2206+
if nargs is None or nargs in (ONE_OR_MORE, PARSER):
2207+
return 1
2208+
elif isinstance(nargs, int):
2209+
return nargs
2210+
# OPTIONAL, ZERO_OR_MORE, REMAINDER contribute 0
2211+
return 0
2212+
22022213
# function to convert arg_strings into an optional action
22032214
def consume_optional(start_index):
22042215

@@ -2282,6 +2293,88 @@ def consume_optional(start_index):
22822293
else:
22832294
start = start_index + 1
22842295
selected_patterns = arg_strings_pattern[start:]
2296+
2297+
# Reserve the minimum number of args required by the
2298+
# remaining positionals; replace excess 'A' tokens with 'O'
2299+
# so that match_argument cannot consume them.
2300+
#
2301+
# Note that the current option can only consume 'A' tokens
2302+
# up to the next 'O' (another option flag) in the pattern
2303+
# as the regex itself stops there. Only count tokens in
2304+
# that segment -- 'A' tokens in later segments belong to
2305+
# other options' territories and are handled when those
2306+
# options are processed.
2307+
first_o = selected_patterns.find('O')
2308+
if first_o == -1:
2309+
segment, rest = selected_patterns, ''
2310+
else:
2311+
segment = selected_patterns[:first_o]
2312+
rest = selected_patterns[first_o:]
2313+
segment_arg_count = segment.count('A')
2314+
2315+
# Compute the minimum number of arg strings still required
2316+
# by the remaining positionals to avoid greedy consumption
2317+
min_pos = sum(min_for_action(pos) for pos in positionals)
2318+
if intermixed:
2319+
# In intermixed mode consume_positionals is never
2320+
# called during the main loop, so `positionals` never
2321+
# shrinks. Positional-typed tokens already collected in
2322+
# extras will satisfy the positional requirement in the
2323+
# second pass, so don't count them as still-unsatisfied
2324+
extras_arg_count = sum(
2325+
1 for c in extras_pattern if c == 'A'
2326+
)
2327+
min_pos = max(0, min_pos - extras_arg_count)
2328+
2329+
if min_pos > 0 and segment_arg_count > 0:
2330+
# Compute the maximum positional args that future
2331+
# option segments can absorb, accounting for each
2332+
# future option's own minimum nargs requirement.
2333+
pattern_end = len(arg_strings_pattern)
2334+
later_option_string_indices = sorted(
2335+
i for i in option_string_indices
2336+
if i > start_index
2337+
)
2338+
later_arg_capacity = 0
2339+
for k, later_option_string_index in enumerate(
2340+
later_option_string_indices
2341+
):
2342+
seg_start = later_option_string_index + 1
2343+
seg_stop = (
2344+
later_option_string_indices[k + 1]
2345+
if k + 1 < len(later_option_string_indices)
2346+
else pattern_end
2347+
)
2348+
seg_arg_count = arg_strings_pattern[seg_start:seg_stop].count('A')
2349+
later_option_tuples = option_string_indices[later_option_string_index]
2350+
later_action = later_option_tuples[0][0]
2351+
later_explicit_arg = later_option_tuples[0][3]
2352+
if later_explicit_arg is not None or later_action is None:
2353+
# explicit arg (--opt=val) or unknown action:
2354+
# consumes no pattern 'A' tokens
2355+
later_min_arg_count = 0
2356+
else:
2357+
later_min_arg_count = min_for_action(later_action)
2358+
later_arg_capacity += max(
2359+
0, seg_arg_count - later_min_arg_count
2360+
)
2361+
2362+
reserved_arg_count = max(0, min_pos - later_arg_capacity)
2363+
max_arg_count = max(
2364+
min_for_action(action),
2365+
segment_arg_count - reserved_arg_count,
2366+
)
2367+
if max_arg_count < segment_arg_count:
2368+
count = 0
2369+
limited = []
2370+
for c in segment:
2371+
if c == 'A':
2372+
count += 1
2373+
if count > max_arg_count:
2374+
c = 'O'
2375+
limited.append(c)
2376+
selected_patterns = ''.join(limited) + rest
2377+
22852378
arg_count = match_argument(action, selected_patterns)
22862379
stop = start + arg_count
22872380
args = arg_strings[start:stop]

Lib/test/test_argparse.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,6 @@ class TestOptionalsNargsOptional(ParserTestCase):
698698
('-z 2', NS(w=None, x=None, y='spam', z=2)),
699699
]
700700

701-
@unittest.expectedFailure
702701
def test_does_not_steal_required_positional(self):
703702
# https://github.com/python/cpython/issues/53584
704703
parser = ErrorRaisingArgumentParser()
@@ -708,7 +707,6 @@ def test_does_not_steal_required_positional(self):
708707
self.assertIsNone(args.foo)
709708
self.assertEqual(args.bar, 'abc')
710709

711-
@unittest.expectedFailure
712710
def test_does_not_steal_two_required_positionals(self):
713711
# https://github.com/python/cpython/issues/53584
714712
parser = ErrorRaisingArgumentParser()
@@ -720,7 +718,6 @@ def test_does_not_steal_two_required_positionals(self):
720718
self.assertEqual(args.bar, 'a')
721719
self.assertEqual(args.bax, 'b')
722720

723-
@unittest.expectedFailure
724721
def test_does_not_steal_two_required_positional_interspersed(self):
725722
# https://github.com/python/cpython/issues/53584
726723
parser = ErrorRaisingArgumentParser()
@@ -762,7 +759,6 @@ class TestOptionalsNargsZeroOrMore(ParserTestCase):
762759
('-y a b', NS(x=None, y=['a', 'b'])),
763760
]
764761

765-
@unittest.expectedFailure
766762
def test_does_not_steal_required_positional(self):
767763
# https://github.com/python/cpython/issues/53584
768764
parser = ErrorRaisingArgumentParser()
@@ -789,7 +785,6 @@ class TestOptionalsNargsOneOrMore(ParserTestCase):
789785
('-y a b', NS(x=None, y=['a', 'b'])),
790786
]
791787

792-
@unittest.expectedFailure
793788
def test_does_not_steal_required_positional(self):
794789
# https://github.com/python/cpython/issues/53584
795790
parser = ErrorRaisingArgumentParser()
@@ -799,7 +794,6 @@ def test_does_not_steal_required_positional(self):
799794
self.assertEqual(args.foo, ['a'])
800795
self.assertEqual(args.bar, 'b')
801796

802-
@unittest.expectedFailure
803797
def test_does_not_steal_interspersed_required_positional(self):
804798
# https://github.com/python/cpython/issues/53584
805799
parser = ErrorRaisingArgumentParser()
@@ -6849,7 +6843,6 @@ def test_invalid_args(self):
68496843
parser = ErrorRaisingArgumentParser(prog='PROG')
68506844
self.assertRaises(ArgumentParserError, parser.parse_intermixed_args, ['a'])
68516845

6852-
@unittest.expectedFailure
68536846
def test_variable_nargs_option_before_positional(self):
68546847
# https://github.com/python/cpython/issues/53584
68556848
parser = ErrorRaisingArgumentParser()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix :mod:`argparse` options with variable :ref:`nargs <nargs>` (``'?'``,
2+
``'*'``, ``'+'``) greedily consuming argument strings that should be
3+
reserved for required positional arguments.

0 commit comments

Comments
 (0)