Skip to content

Commit a8435e9

Browse files
committed
feat: replay changes from @otetard
andreasvc#51 - thank you!
1 parent cb37620 commit a8435e9

File tree

9 files changed

+58
-81
lines changed

9 files changed

+58
-81
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ requires = [
55
"Cython>=0.20",
66
"pybind11>=2.12",
77
"ninja; sys_platform != 'Windows'",
8-
"cmake>=3.18",
8+
"cmake (>=3.18,<4.0.0)",
99
]
1010

1111
build-backend = "setuptools.build_meta"

src/compile.pxi

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,13 @@ def _compile(object pattern, int flags=0, int max_mem=8388608):
4242
return fallback(original_pattern, flags, "re.LOCALE not supported")
4343
pattern = unicode_to_bytes(pattern, &encoded, -1)
4444
newflags = flags
45-
if not PY2:
46-
if not encoded and flags & _U: # re.UNICODE
47-
pass # can use UNICODE with bytes pattern, but assumes valid UTF-8
48-
# raise ValueError("can't use UNICODE flag with a bytes pattern")
49-
elif encoded and not (flags & ASCII): # re.ASCII (not in Python 2)
50-
newflags = flags | _U # re.UNICODE
51-
elif encoded and flags & ASCII:
52-
newflags = flags & ~_U # re.UNICODE
45+
if not encoded and flags & _U: # re.UNICODE
46+
pass # can use UNICODE with bytes pattern, but assumes valid UTF-8
47+
# raise ValueError("can't use UNICODE flag with a bytes pattern")
48+
elif encoded and not (flags & ASCII): # re.ASCII (not in Python 2)
49+
newflags = flags | _U # re.UNICODE
50+
elif encoded and flags & ASCII:
51+
newflags = flags & ~_U # re.UNICODE
5352
try:
5453
pattern = _prepare_pattern(pattern, newflags)
5554
except BackreferencesException:

src/includes.pxi

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,6 @@ cdef extern from *:
1212
cdef void emit_endif "#endif //" ()
1313

1414

15-
cdef extern from "Python.h":
16-
int PyObject_CheckReadBuffer(object)
17-
int PyObject_AsReadBuffer(object, const void **, Py_ssize_t *)
18-
19-
2015
cdef extern from "re2/stringpiece.h" namespace "re2":
2116
cdef cppclass StringPiece:
2217
StringPiece()

src/match.pxi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,12 @@ cdef class Match:
115115
"""Expand a template with groups."""
116116
cdef bytearray result = bytearray()
117117
if isinstance(template, unicode):
118-
if not PY2 and not self.encoded:
118+
if not self.encoded:
119119
raise ValueError(
120120
'cannot expand unicode template on bytes pattern')
121121
templ = template.encode('utf8')
122122
else:
123-
if not PY2 and self.encoded:
123+
if self.encoded:
124124
raise ValueError(
125125
'cannot expand bytes template on unicode pattern')
126126
templ = bytes(template)

src/pattern.pxi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ cdef class Pattern:
417417
if not repl_encoded and not isinstance(repl, bytes):
418418
repl_b = bytes(repl) # coerce buffer to bytes object
419419

420-
if count > 1 or (b'\\' if PY2 else <char>b'\\') in repl_b:
420+
if count > 1 or <char>b'\\' in repl_b:
421421
# Limit on number of substitutions or replacement string contains
422422
# escape sequences; handle with Match.expand() implementation.
423423
# RE2 does support simple numeric group references \1, \2,

src/re2.pyx

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@ VERSION_HEX = 0x000217
151151

152152
cdef int _I = I, _M = M, _S = S, _U = U, _X = X, _L = L
153153
cdef int current_notification = FALLBACK_QUIETLY
154-
cdef bint PY2 = PY_MAJOR_VERSION == 2
155154

156155
# Type of compiled re object from Python stdlib
157156
SREPattern = type(re.compile(''))
@@ -252,7 +251,7 @@ def escape(pattern):
252251
"""Escape all non-alphanumeric characters in pattern."""
253252
cdef bint uni = isinstance(pattern, unicode)
254253
cdef list s
255-
if PY2 or uni:
254+
if uni:
256255
s = list(pattern)
257256
else:
258257
s = [bytes([c]) for c in pattern]
@@ -350,9 +349,9 @@ cdef inline unicode_to_bytes(object pystring, int * encoded,
350349
encoded[0] = 1 if origlen == len(pystring) else 2
351350
else:
352351
encoded[0] = 0
353-
if not PY2 and checkotherencoding > 0 and not encoded[0]:
352+
if checkotherencoding > 0 and not encoded[0]:
354353
raise TypeError("can't use a string pattern on a bytes-like object")
355-
elif not PY2 and checkotherencoding == 0 and encoded[0]:
354+
elif checkotherencoding == 0 and encoded[0]:
356355
raise TypeError("can't use a bytes pattern on a string-like object")
357356
return pystring
358357

@@ -366,14 +365,7 @@ cdef inline int pystring_to_cstring(
366365
cdef int result = -1
367366
cstring[0] = NULL
368367
size[0] = 0
369-
if PY2:
370-
# Although the new-style buffer interface was backported to Python 2.6,
371-
# some modules, notably mmap, only support the old buffer interface.
372-
# Cf. http://bugs.python.org/issue9229
373-
if PyObject_CheckReadBuffer(pystring) == 1:
374-
result = PyObject_AsReadBuffer(
375-
pystring, <const void **>cstring, size)
376-
elif PyObject_CheckBuffer(pystring) == 1: # new-style Buffer interface
368+
if PyObject_CheckBuffer(pystring) == 1: # new-style Buffer interface
377369
result = PyObject_GetBuffer(pystring, buf, PyBUF_SIMPLE)
378370
if result == 0:
379371
cstring[0] = <char *>buf.buf
@@ -383,8 +375,7 @@ cdef inline int pystring_to_cstring(
383375

384376
cdef inline void release_cstring(Py_buffer *buf):
385377
"""Release buffer if necessary."""
386-
if not PY2:
387-
PyBuffer_Release(buf)
378+
PyBuffer_Release(buf)
388379

389380

390381
cdef utf8indices(char * cstring, Py_ssize_t size, Py_ssize_t *pos, Py_ssize_t *endpos):

tests/test_charliterals.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import re2 as re
2+
import warnings
3+
4+
warnings.filterwarnings('ignore', category=DeprecationWarning)
5+
6+
import unittest
7+
8+
class TestCharLiterals(unittest.TestCase):
9+
def test_character_literals(self):
10+
i = 126
11+
12+
assert re.compile(r"\%03o" % i) == re.compile('\\176')
13+
assert re.compile(r"\%03o" % i)._dump_pattern() == '\\176'
14+
assert (re.match(r"\%03o" % i, chr(i)) is None) == False
15+
assert (re.match(r"\%03o0" % i, chr(i) + "0") is None) == False
16+
assert (re.match(r"\%03o8" % i, chr(i) + "8") is None) == False
17+
assert (re.match(r"\x%02x" % i, chr(i)) is None) == False
18+
assert (re.match(r"\x%02x0" % i, chr(i) + "0") is None) == False
19+
assert (re.match(r"\x%02xz" % i, chr(i) + "z") is None) == False
20+
21+
try:
22+
re.match("\911", "")
23+
except Exception as exp:
24+
assert exp.msg == "invalid group reference 91 at position 1"
25+
26+
27+
def test_character_class_literals(self):
28+
i = 126
29+
30+
assert (re.match(r"[\%03o]" % i, chr(i)) is None) == False
31+
assert (re.match(r"[\%03o0]" % i, chr(i) + "0") is None) == False
32+
assert (re.match(r"[\%03o8]" % i, chr(i) + "8") is None) == False
33+
assert (re.match(r"[\x%02x]" % i, chr(i)) is None) == False
34+
assert (re.match(r"[\x%02x0]" % i, chr(i) + "0") is None) == False
35+
assert (re.match(r"[\x%02xz]" % i, chr(i) + "z") is None) == False
36+
37+
try:
38+
re.match("[\911]", "")
39+
except Exception as exp:
40+
assert exp.msg == "invalid escape sequence: \9"

tests/test_charliterals.txt

Lines changed: 0 additions & 47 deletions
This file was deleted.

tox.ini

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
[tox]
2-
envlist = py3{7,8,9,10,11,12,13}
2+
envlist = py3{8,9,10,11,12,13}
33
skip_missing_interpreters = true
44
isolated_build = true
55
skipsdist=True
66

77
[gh-actions]
88
python =
9-
3.7: py37
109
3.8: py38
1110
3.9: py39
1211
3.10: py310
@@ -167,7 +166,7 @@ deps =
167166
pip>=20.0.1
168167

169168
commands =
170-
pip install pyre2-updated --force-reinstall --prefer-binary -f dist/
169+
pip install pyre2-updated --force-reinstall --prefer-binary --no-index -f dist/
171170
python -m unittest discover -f -s .
172171

173172
[testenv:style]

0 commit comments

Comments
 (0)