Skip to content

Commit eaafc78

Browse files
[3.13] gh-126997: Fix support of non-ASCII strings in pickletools (GH-127062) (GH-127094)
* Fix support of STRING and GLOBAL opcodes with non-ASCII arguments. * dis() now outputs non-ASCII bytes in STRING, BINSTRING and SHORT_BINSTRING arguments as escaped (\xXX). (cherry picked from commit eaf2171) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 746a0c5 commit eaafc78

File tree

3 files changed

+92
-4
lines changed

3 files changed

+92
-4
lines changed

Lib/pickletools.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ def read_uint8(f):
312312
doc="Eight-byte unsigned integer, little-endian.")
313313

314314

315-
def read_stringnl(f, decode=True, stripquotes=True):
315+
def read_stringnl(f, decode=True, stripquotes=True, *, encoding='latin-1'):
316316
r"""
317317
>>> import io
318318
>>> read_stringnl(io.BytesIO(b"'abcd'\nefg\n"))
@@ -356,7 +356,7 @@ def read_stringnl(f, decode=True, stripquotes=True):
356356
raise ValueError("no string quotes around %r" % data)
357357

358358
if decode:
359-
data = codecs.escape_decode(data)[0].decode("ascii")
359+
data = codecs.escape_decode(data)[0].decode(encoding)
360360
return data
361361

362362
stringnl = ArgumentDescriptor(
@@ -370,7 +370,7 @@ def read_stringnl(f, decode=True, stripquotes=True):
370370
""")
371371

372372
def read_stringnl_noescape(f):
373-
return read_stringnl(f, stripquotes=False)
373+
return read_stringnl(f, stripquotes=False, encoding='utf-8')
374374

375375
stringnl_noescape = ArgumentDescriptor(
376376
name='stringnl_noescape',
@@ -2513,7 +2513,10 @@ def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0):
25132513
# make a mild effort to align arguments
25142514
line += ' ' * (10 - len(opcode.name))
25152515
if arg is not None:
2516-
line += ' ' + repr(arg)
2516+
if opcode.name in ("STRING", "BINSTRING", "SHORT_BINSTRING"):
2517+
line += ' ' + ascii(arg)
2518+
else:
2519+
line += ' ' + repr(arg)
25172520
if markmsg:
25182521
line += ' ' + markmsg
25192522
if annotate:

Lib/test/test_pickletools.py

+82
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,88 @@ def test_annotate(self):
371371
highest protocol among opcodes = 0
372372
''', annotate=20)
373373

374+
def test_string(self):
375+
self.check_dis(b"S'abc'\n.", '''\
376+
0: S STRING 'abc'
377+
7: . STOP
378+
highest protocol among opcodes = 0
379+
''')
380+
self.check_dis(b'S"abc"\n.', '''\
381+
0: S STRING 'abc'
382+
7: . STOP
383+
highest protocol among opcodes = 0
384+
''')
385+
self.check_dis(b"S'\xc3\xb5'\n.", '''\
386+
0: S STRING '\\xc3\\xb5'
387+
6: . STOP
388+
highest protocol among opcodes = 0
389+
''')
390+
391+
def test_string_without_quotes(self):
392+
self.check_dis_error(b"Sabc'\n.", '',
393+
'no string quotes around b"abc\'"')
394+
self.check_dis_error(b'Sabc"\n.', '',
395+
"no string quotes around b'abc\"'")
396+
self.check_dis_error(b"S'abc\n.", '',
397+
'''strinq quote b"'" not found at both ends of b"'abc"''')
398+
self.check_dis_error(b'S"abc\n.', '',
399+
r"""strinq quote b'"' not found at both ends of b'"abc'""")
400+
self.check_dis_error(b"S'abc\"\n.", '',
401+
r"""strinq quote b"'" not found at both ends of b'\\'abc"'""")
402+
self.check_dis_error(b"S\"abc'\n.", '',
403+
r"""strinq quote b'"' not found at both ends of b'"abc\\''""")
404+
405+
def test_binstring(self):
406+
self.check_dis(b"T\x03\x00\x00\x00abc.", '''\
407+
0: T BINSTRING 'abc'
408+
8: . STOP
409+
highest protocol among opcodes = 1
410+
''')
411+
self.check_dis(b"T\x02\x00\x00\x00\xc3\xb5.", '''\
412+
0: T BINSTRING '\\xc3\\xb5'
413+
7: . STOP
414+
highest protocol among opcodes = 1
415+
''')
416+
417+
def test_short_binstring(self):
418+
self.check_dis(b"U\x03abc.", '''\
419+
0: U SHORT_BINSTRING 'abc'
420+
5: . STOP
421+
highest protocol among opcodes = 1
422+
''')
423+
self.check_dis(b"U\x02\xc3\xb5.", '''\
424+
0: U SHORT_BINSTRING '\\xc3\\xb5'
425+
4: . STOP
426+
highest protocol among opcodes = 1
427+
''')
428+
429+
def test_global(self):
430+
self.check_dis(b"cmodule\nname\n.", '''\
431+
0: c GLOBAL 'module name'
432+
13: . STOP
433+
highest protocol among opcodes = 0
434+
''')
435+
self.check_dis(b"cm\xc3\xb6dule\nn\xc3\xa4me\n.", '''\
436+
0: c GLOBAL 'm\xf6dule n\xe4me'
437+
15: . STOP
438+
highest protocol among opcodes = 0
439+
''')
440+
441+
def test_inst(self):
442+
self.check_dis(b"(imodule\nname\n.", '''\
443+
0: ( MARK
444+
1: i INST 'module name' (MARK at 0)
445+
14: . STOP
446+
highest protocol among opcodes = 0
447+
''')
448+
449+
def test_persid(self):
450+
self.check_dis(b"Pabc\n.", '''\
451+
0: P PERSID 'abc'
452+
5: . STOP
453+
highest protocol among opcodes = 0
454+
''')
455+
374456

375457
class MiscTestCase(unittest.TestCase):
376458
def test__all__(self):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix support of STRING and GLOBAL opcodes with non-ASCII arguments in
2+
:mod:`pickletools`. :func:`pickletools.dis` now outputs non-ASCII bytes in
3+
STRING, BINSTRING and SHORT_BINSTRING arguments as escaped (``\xXX``).

0 commit comments

Comments
 (0)