pythongh-126374: Add support of options with optional arguments in the getopt module (pythonGH-126375)

serhiy-storchaka · ebonnal · commit f41632c702f1 · 2025-01-10T23:56:29.000+01:00
diff --git a/Doc/library/getopt.rst b/Doc/library/getopt.rst
@@ -38,7 +38,8 @@ exception:
    be parsed, without the leading reference to the running program. Typically, this
    means ``sys.argv[1:]``. *shortopts* is the string of option letters that the
    script wants to recognize, with options that require an argument followed by a
-   colon (``':'``; i.e., the same format that Unix :c:func:`!getopt` uses).
+   colon (``':'``) and options that accept an optional argument followed by
+   two colons (``'::'``); i.e., the same format that Unix :c:func:`!getopt` uses.
 
    .. note::
 
@@ -49,8 +50,10 @@ exception:
    *longopts*, if specified, must be a list of strings with the names of the
    long options which should be supported.  The leading ``'--'`` characters
    should not be included in the option name.  Long options which require an
-   argument should be followed by an equal sign (``'='``).  Optional arguments
-   are not supported.  To accept only long options, *shortopts* should be an
+   argument should be followed by an equal sign (``'='``).
+   Long options which accept an optional argument should be followed by
+   an equal sign and question mark (``'=?'``).
+   To accept only long options, *shortopts* should be an
    empty string.  Long options on the command line can be recognized so long as
    they provide a prefix of the option name that matches exactly one of the
    accepted options.  For example, if *longopts* is ``['foo', 'frob']``, the
@@ -67,6 +70,9 @@ exception:
    options occur in the list in the same order in which they were found, thus
    allowing multiple occurrences.  Long and short options may be mixed.
 
+   .. versionchanged:: 3.14
+      Optional arguments are supported.
+
 
 .. function:: gnu_getopt(args, shortopts, longopts=[])
 
@@ -124,6 +130,20 @@ Using long option names is equally easy:
    >>> args
    ['a1', 'a2']
 
+Optional arguments should be specified explicitly:
+
+.. doctest::
+
+   >>> s = '-Con -C --color=off --color a1 a2'
+   >>> args = s.split()
+   >>> args
+   ['-Con', '-C', '--color=off', '--color', 'a1', 'a2']
+   >>> optlist, args = getopt.getopt(args, 'C::', ['color=?'])
+   >>> optlist
+   [('-C', 'on'), ('-C', ''), ('--color', 'off'), ('--color', '')]
+   >>> args
+   ['a1', 'a2']
+
 In a script, typical usage is something like this:
 
 .. testcode::
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
@@ -314,6 +314,11 @@ functools
   to reserve a place for positional arguments.
   (Contributed by Dominykas Grigonis in :gh:`119127`.)
 
+getopt
+------
+
+* Add support for options with optional arguments.
+  (Contributed by Serhiy Storchaka in :gh:`126374`.)
 
 http
 ----
diff --git a/Lib/getopt.py b/Lib/getopt.py
@@ -27,7 +27,6 @@
 # - allow the caller to specify ordering
 # - RETURN_IN_ORDER option
 # - GNU extension with '-' as first character of option string
-# - optional arguments, specified by double colons
 # - an option string with a W followed by semicolon should
 #   treat "-W foo" as "--foo"
 
@@ -58,12 +57,14 @@ def getopt(args, shortopts, longopts = []):
     running program.  Typically, this means "sys.argv[1:]".  shortopts
     is the string of option letters that the script wants to
     recognize, with options that require an argument followed by a
-    colon (i.e., the same format that Unix getopt() uses).  If
+    colon and options that accept an optional argument followed by
+    two colons (i.e., the same format that Unix getopt() uses).  If
     specified, longopts is a list of strings with the names of the
     long options which should be supported.  The leading '--'
     characters should not be included in the option name.  Options
     which require an argument should be followed by an equal sign
-    ('=').
+    ('=').  Options which acept an optional argument should be
+    followed by an equal sign and question mark ('=?').
 
     The return value consists of two elements: the first is a list of
     (option, value) pairs; the second is the list of program arguments
@@ -153,7 +154,7 @@ def do_longs(opts, opt, longopts, args):
 
     has_arg, opt = long_has_args(opt, longopts)
     if has_arg:
-        if optarg is None:
+        if optarg is None and has_arg != '?':
             if not args:
                 raise GetoptError(_('option --%s requires argument') % opt, opt)
             optarg, args = args[0], args[1:]
@@ -174,13 +175,17 @@ def long_has_args(opt, longopts):
         return False, opt
     elif opt + '=' in possibilities:
         return True, opt
+    elif opt + '=?' in possibilities:
+        return '?', opt
     # No exact match, so better be unique.
     if len(possibilities) > 1:
         # XXX since possibilities contains all valid continuations, might be
         # nice to work them into the error msg
         raise GetoptError(_('option --%s not a unique prefix') % opt, opt)
     assert len(possibilities) == 1
     unique_match = possibilities[0]
+    if unique_match.endswith('=?'):
+        return '?', unique_match[:-2]
     has_arg = unique_match.endswith('=')
     if has_arg:
         unique_match = unique_match[:-1]
@@ -189,8 +194,9 @@ def long_has_args(opt, longopts):
 def do_shorts(opts, optstring, shortopts, args):
     while optstring != '':
         opt, optstring = optstring[0], optstring[1:]
-        if short_has_arg(opt, shortopts):
-            if optstring == '':
+        has_arg = short_has_arg(opt, shortopts)
+        if has_arg:
+            if optstring == '' and has_arg != '?':
                 if not args:
                     raise GetoptError(_('option -%s requires argument') % opt,
                                       opt)
@@ -204,7 +210,11 @@ def do_shorts(opts, optstring, shortopts, args):
 def short_has_arg(opt, shortopts):
     for i in range(len(shortopts)):
         if opt == shortopts[i] != ':':
-            return shortopts.startswith(':', i+1)
+            if not shortopts.startswith(':', i+1):
+                return False
+            if shortopts.startswith('::', i+1):
+                return '?'
+            return True
     raise GetoptError(_('option -%s not recognized') % opt, opt)
 
 if __name__ == '__main__':
diff --git a/Lib/test/test_getopt.py b/Lib/test/test_getopt.py
@@ -19,21 +19,34 @@ def assertError(self, *args, **kwargs):
         self.assertRaises(getopt.GetoptError, *args, **kwargs)
 
     def test_short_has_arg(self):
-        self.assertTrue(getopt.short_has_arg('a', 'a:'))
-        self.assertFalse(getopt.short_has_arg('a', 'a'))
+        self.assertIs(getopt.short_has_arg('a', 'a:'), True)
+        self.assertIs(getopt.short_has_arg('a', 'a'), False)
+        self.assertEqual(getopt.short_has_arg('a', 'a::'), '?')
         self.assertError(getopt.short_has_arg, 'a', 'b')
 
     def test_long_has_args(self):
         has_arg, option = getopt.long_has_args('abc', ['abc='])
-        self.assertTrue(has_arg)
+        self.assertIs(has_arg, True)
         self.assertEqual(option, 'abc')
 
         has_arg, option = getopt.long_has_args('abc', ['abc'])
-        self.assertFalse(has_arg)
+        self.assertIs(has_arg, False)
         self.assertEqual(option, 'abc')
 
+        has_arg, option = getopt.long_has_args('abc', ['abc=?'])
+        self.assertEqual(has_arg, '?')
+        self.assertEqual(option, 'abc')
+
+        has_arg, option = getopt.long_has_args('abc', ['abcd='])
+        self.assertIs(has_arg, True)
+        self.assertEqual(option, 'abcd')
+
         has_arg, option = getopt.long_has_args('abc', ['abcd'])
-        self.assertFalse(has_arg)
+        self.assertIs(has_arg, False)
+        self.assertEqual(option, 'abcd')
+
+        has_arg, option = getopt.long_has_args('abc', ['abcd=?'])
+        self.assertEqual(has_arg, '?')
         self.assertEqual(option, 'abcd')
 
         self.assertError(getopt.long_has_args, 'abc', ['def'])
@@ -49,9 +62,9 @@ def test_do_shorts(self):
         self.assertEqual(opts, [('-a', '1')])
         self.assertEqual(args, [])
 
-        #opts, args = getopt.do_shorts([], 'a=1', 'a:', [])
-        #self.assertEqual(opts, [('-a', '1')])
-        #self.assertEqual(args, [])
+        opts, args = getopt.do_shorts([], 'a=1', 'a:', [])
+        self.assertEqual(opts, [('-a', '=1')])
+        self.assertEqual(args, [])
 
         opts, args = getopt.do_shorts([], 'a', 'a:', ['1'])
         self.assertEqual(opts, [('-a', '1')])
@@ -61,6 +74,14 @@ def test_do_shorts(self):
         self.assertEqual(opts, [('-a', '1')])
         self.assertEqual(args, ['2'])
 
+        opts, args = getopt.do_shorts([], 'a', 'a::', ['1'])
+        self.assertEqual(opts, [('-a', '')])
+        self.assertEqual(args, ['1'])
+
+        opts, args = getopt.do_shorts([], 'a1', 'a::', [])
+        self.assertEqual(opts, [('-a', '1')])
+        self.assertEqual(args, [])
+
         self.assertError(getopt.do_shorts, [], 'a1', 'a', [])
         self.assertError(getopt.do_shorts, [], 'a', 'a:', [])
 
@@ -77,6 +98,22 @@ def test_do_longs(self):
         self.assertEqual(opts, [('--abcd', '1')])
         self.assertEqual(args, [])
 
+        opts, args = getopt.do_longs([], 'abc', ['abc=?'], ['1'])
+        self.assertEqual(opts, [('--abc', '')])
+        self.assertEqual(args, ['1'])
+
+        opts, args = getopt.do_longs([], 'abc', ['abcd=?'], ['1'])
+        self.assertEqual(opts, [('--abcd', '')])
+        self.assertEqual(args, ['1'])
+
+        opts, args = getopt.do_longs([], 'abc=1', ['abc=?'], [])
+        self.assertEqual(opts, [('--abc', '1')])
+        self.assertEqual(args, [])
+
+        opts, args = getopt.do_longs([], 'abc=1', ['abcd=?'], [])
+        self.assertEqual(opts, [('--abcd', '1')])
+        self.assertEqual(args, [])
+
         opts, args = getopt.do_longs([], 'abc', ['ab', 'abc', 'abcd'], [])
         self.assertEqual(opts, [('--abc', '')])
         self.assertEqual(args, [])
@@ -95,7 +132,7 @@ def test_getopt(self):
         # note: the empty string between '-a' and '--beta' is significant:
         # it simulates an empty string option argument ('-a ""') on the
         # command line.
-        cmdline = ['-a', '1', '-b', '--alpha=2', '--beta', '-a', '3', '-a',
+        cmdline = ['-a1', '-b', '--alpha=2', '--beta', '-a', '3', '-a',
                    '', '--beta', 'arg1', 'arg2']
 
         opts, args = getopt.getopt(cmdline, 'a:b', ['alpha=', 'beta'])
@@ -106,17 +143,29 @@ def test_getopt(self):
         # accounted for in the code that calls getopt().
         self.assertEqual(args, ['arg1', 'arg2'])
 
+        cmdline = ['-a1', '--alpha=2', '--alpha=', '-a', '--alpha', 'arg1', 'arg2']
+        opts, args = getopt.getopt(cmdline, 'a::', ['alpha=?'])
+        self.assertEqual(opts, [('-a', '1'), ('--alpha', '2'), ('--alpha', ''),
+                                ('-a', ''), ('--alpha', '')])
+        self.assertEqual(args, ['arg1', 'arg2'])
+
         self.assertError(getopt.getopt, cmdline, 'a:b', ['alpha', 'beta'])
 
     def test_gnu_getopt(self):
         # Test handling of GNU style scanning mode.
-        cmdline = ['-a', 'arg1', '-b', '1', '--alpha', '--beta=2']
+        cmdline = ['-a', 'arg1', '-b', '1', '--alpha', '--beta=2', '--beta',
+                   '3', 'arg2']
 
         # GNU style
         opts, args = getopt.gnu_getopt(cmdline, 'ab:', ['alpha', 'beta='])
-        self.assertEqual(args, ['arg1'])
-        self.assertEqual(opts, [('-a', ''), ('-b', '1'),
-                                ('--alpha', ''), ('--beta', '2')])
+        self.assertEqual(args, ['arg1', 'arg2'])
+        self.assertEqual(opts, [('-a', ''), ('-b', '1'), ('--alpha', ''),
+                                ('--beta', '2'), ('--beta', '3')])
+
+        opts, args = getopt.gnu_getopt(cmdline, 'ab::', ['alpha', 'beta=?'])
+        self.assertEqual(args, ['arg1', '1', '3', 'arg2'])
+        self.assertEqual(opts, [('-a', ''), ('-b', ''), ('--alpha', ''),
+                                ('--beta', '2'), ('--beta', '')])
 
         # recognize "-" as an argument
         opts, args = getopt.gnu_getopt(['-a', '-', '-b', '-'], 'ab:', [])
@@ -126,13 +175,15 @@ def test_gnu_getopt(self):
         # Posix style via +
         opts, args = getopt.gnu_getopt(cmdline, '+ab:', ['alpha', 'beta='])
         self.assertEqual(opts, [('-a', '')])
-        self.assertEqual(args, ['arg1', '-b', '1', '--alpha', '--beta=2'])
+        self.assertEqual(args, ['arg1', '-b', '1', '--alpha', '--beta=2',
+                                '--beta', '3', 'arg2'])
 
         # Posix style via POSIXLY_CORRECT
         self.env["POSIXLY_CORRECT"] = "1"
         opts, args = getopt.gnu_getopt(cmdline, 'ab:', ['alpha', 'beta='])
         self.assertEqual(opts, [('-a', '')])
-        self.assertEqual(args, ['arg1', '-b', '1', '--alpha', '--beta=2'])
+        self.assertEqual(args, ['arg1', '-b', '1', '--alpha', '--beta=2',
+                                '--beta', '3', 'arg2'])
 
     def test_issue4629(self):
         longopts, shortopts = getopt.getopt(['--help='], '', ['help='])
diff --git a/Misc/NEWS.d/next/Library/2024-11-03-23-25-07.gh-issue-126374.Xu_THP.rst b/Misc/NEWS.d/next/Library/2024-11-03-23-25-07.gh-issue-126374.Xu_THP.rst
@@ -0,0 +1 @@
+Add support for options with optional arguments in the :mod:`getopt` module.

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Add support for options with optional arguments in the :mod:`getopt` module.