From 5583ac0c311132e36ef458842e087945898ffdec Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 18 Nov 2024 20:59:42 +0000 Subject: [PATCH 1/5] gh-127022: Simplify `PyStackRef_FromPyObjectSteal` This gets rid of the immortal check in `PyStackRef_FromPyObjectSteal()`. Overall, this improves performance about 2% in the free threading build. This also renames `PyStackRef_Is()` to `PyStackRef_IsExactly()` because the macro requires that the tag bits of the arguments match, which is only true in certain special cases. --- Include/internal/pycore_stackref.h | 13 +++++--- Python/bytecodes.c | 50 +++++++++++++++--------------- Python/executor_cases.c.h | 36 ++++++++++----------- Python/generated_cases.c.h | 48 ++++++++++++++-------------- Tools/cases_generator/analyzer.py | 5 ++- 5 files changed, 80 insertions(+), 72 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 588e57f6cd97e0..e6b61dfb56d0d7 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -99,8 +99,7 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj) assert(obj != NULL); // Make sure we don't take an already tagged value. assert(((uintptr_t)obj & Py_TAG_BITS) == 0); - unsigned int tag = _Py_IsImmortal(obj) ? (Py_TAG_DEFERRED) : Py_TAG_PTR; - return ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); + return (_PyStackRef){ .bits = (uintptr_t)obj }; } # define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj)) @@ -190,9 +189,15 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; #endif // Py_GIL_DISABLED -// Note: this is a macro because MSVC (Windows) has trouble inlining it. +// Check if a stackref is exactly the same as another stackref, including the +// the deferred bit. This can only be used safely if you know that the deferred +// of `a` and `b` bits match. +#define PyStackRef_IsExactly(a, b) ((a).bits == (b).bits) -#define PyStackRef_Is(a, b) ((a).bits == (b).bits) +// Checks that mask out the deferred bit in the free threading build. +#define PyStackRef_IsNone(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_None) +#define PyStackRef_IsTrue(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_True) +#define PyStackRef_IsFalse(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_False) // Converts a PyStackRef back to a PyObject *, converting the // stackref to a new reference. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c85b49842daf44..25a4c2bcd4cffe 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -376,7 +376,7 @@ dummy_func( pure inst(UNARY_NOT, (value -- res)) { assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_Is(value, PyStackRef_False) + res = PyStackRef_IsExactly(value, PyStackRef_False) ? PyStackRef_True : PyStackRef_False; DEAD(value); } @@ -441,7 +441,7 @@ dummy_func( inst(TO_BOOL_NONE, (unused/1, unused/2, value -- res)) { // This one is a bit weird, because we expect *some* failures: - EXIT_IF(!PyStackRef_Is(value, PyStackRef_None)); + EXIT_IF(!PyStackRef_IsNone(value)); DEAD(value); STAT_INC(TO_BOOL, hit); res = PyStackRef_False; @@ -651,9 +651,7 @@ dummy_func( // specializations, but there is no output. // At the end we just skip over the STORE_FAST. op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { - #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; @@ -664,7 +662,7 @@ dummy_func( next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - DEOPT_IF(!PyStackRef_Is(*target_local, left)); + DEOPT_IF(PyStackRef_AsPyObjectBorrow(*target_local) != left_o); STAT_INC(BINARY_OP, hit); /* Handle `left = left + right` or `left += right` for str. * @@ -1141,7 +1139,7 @@ dummy_func( gen_frame->previous = frame; DISPATCH_INLINED(gen_frame); } - if (PyStackRef_Is(v, PyStackRef_None) && PyIter_Check(receiver_o)) { + if (PyStackRef_IsNone(v) && PyIter_Check(receiver_o)) { retval_o = Py_TYPE(receiver_o)->tp_iternext(receiver_o); } else { @@ -1249,7 +1247,7 @@ dummy_func( inst(POP_EXCEPT, (exc_value -- )) { _PyErr_StackItem *exc_info = tstate->exc_info; Py_XSETREF(exc_info->exc_value, - PyStackRef_Is(exc_value, PyStackRef_None) + PyStackRef_IsNone(exc_value) ? NULL : PyStackRef_AsPyObjectSteal(exc_value)); } @@ -2408,6 +2406,14 @@ dummy_func( } else { res = PyStackRef_FromPyObjectSteal(res_o); +#ifdef Py_GIL_DISABLED + // Ensure that Py_TAG_DEFERRED is set for Py_True and Py_False + // so that ops like _POP_JUMP_IF_FALSE can use the faster + // PyStackRef_IsExactly() check. + if (_Py_IsImmortal(res_o)) { + res.bits |= Py_TAG_DEFERRED; + } +#endif } } @@ -2481,13 +2487,7 @@ dummy_func( } inst(IS_OP, (left, right -- b)) { -#ifdef Py_GIL_DISABLED - // On free-threaded builds, objects are conditionally immortalized. - // So their bits don't always compare equally. int res = Py_Is(PyStackRef_AsPyObjectBorrow(left), PyStackRef_AsPyObjectBorrow(right)) ^ oparg; -#else - int res = PyStackRef_Is(left, right) ^ oparg; -#endif DECREF_INPUTS(); b = res ? PyStackRef_True : PyStackRef_False; } @@ -2693,7 +2693,7 @@ dummy_func( replaced op(_POP_JUMP_IF_FALSE, (cond -- )) { assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsExactly(cond, PyStackRef_False); DEAD(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); @@ -2701,14 +2701,14 @@ dummy_func( replaced op(_POP_JUMP_IF_TRUE, (cond -- )) { assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsExactly(cond, PyStackRef_True); DEAD(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } op(_IS_NONE, (value -- b)) { - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; DEAD(value); } @@ -3752,7 +3752,7 @@ dummy_func( inst(EXIT_INIT_CHECK, (should_be_none -- )) { assert(STACK_LEVEL() == 2); - if (!PyStackRef_Is(should_be_none, PyStackRef_None)) { + if (!PyStackRef_IsNone(should_be_none)) { PyErr_Format(PyExc_TypeError, "__init__() should return None, not '%.200s'", Py_TYPE(PyStackRef_AsPyObjectBorrow(should_be_none))->tp_name); @@ -4712,7 +4712,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_TRUE, (unused/1 -- )) { _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsExactly(cond, PyStackRef_True); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -4721,7 +4721,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_FALSE, (unused/1 -- )) { _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsExactly(cond, PyStackRef_False); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -4729,7 +4729,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_NONE, (unused/1 -- )) { _PyStackRef value_stackref = POP(); - int flag = PyStackRef_Is(value_stackref, PyStackRef_None); + int flag = PyStackRef_IsNone(value_stackref); int offset; if (flag) { offset = oparg; @@ -4745,7 +4745,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_NOT_NONE, (unused/1 -- )) { _PyStackRef value_stackref = POP(); int offset; - int nflag = PyStackRef_Is(value_stackref, PyStackRef_None); + int nflag = PyStackRef_IsNone(value_stackref); if (nflag) { offset = 0; } @@ -4780,21 +4780,21 @@ dummy_func( ///////// Tier-2 only opcodes ///////// op (_GUARD_IS_TRUE_POP, (flag -- )) { - int is_true = PyStackRef_Is(flag, PyStackRef_True); + int is_true = PyStackRef_IsTrue(flag); DEAD(flag); SYNC_SP(); EXIT_IF(!is_true); } op (_GUARD_IS_FALSE_POP, (flag -- )) { - int is_false = PyStackRef_Is(flag, PyStackRef_False); + int is_false = PyStackRef_IsFalse(flag); DEAD(flag); SYNC_SP(); EXIT_IF(!is_false); } op (_GUARD_IS_NONE_POP, (val -- )) { - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); if (!is_none) { PyStackRef_CLOSE(val); SYNC_SP(); @@ -4804,7 +4804,7 @@ dummy_func( } op (_GUARD_IS_NOT_NONE_POP, (val -- )) { - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); PyStackRef_CLOSE(val); SYNC_SP(); EXIT_IF(is_none); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2c2a09adf281a7..796d96ce0cae67 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -445,7 +445,7 @@ _PyStackRef res; value = stack_pointer[-1]; assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_Is(value, PyStackRef_False) + res = PyStackRef_IsExactly(value, PyStackRef_False) ? PyStackRef_True : PyStackRef_False; stack_pointer[-1] = res; break; @@ -519,7 +519,7 @@ _PyStackRef res; value = stack_pointer[-1]; // This one is a bit weird, because we expect *some* failures: - if (!PyStackRef_Is(value, PyStackRef_None)) { + if (!PyStackRef_IsNone(value)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -822,9 +822,7 @@ _PyStackRef left; right = stack_pointer[-1]; left = stack_pointer[-2]; - #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE @@ -834,7 +832,7 @@ next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - if (!PyStackRef_Is(*target_local, left)) { + if (PyStackRef_AsPyObjectBorrow(*target_local) != left_o) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -1522,7 +1520,7 @@ _PyErr_StackItem *exc_info = tstate->exc_info; _PyFrame_SetStackPointer(frame, stack_pointer); Py_XSETREF(exc_info->exc_value, - PyStackRef_Is(exc_value, PyStackRef_None) + PyStackRef_IsNone(exc_value) ? NULL : PyStackRef_AsPyObjectSteal(exc_value)); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += -1; @@ -2978,6 +2976,14 @@ } else { res = PyStackRef_FromPyObjectSteal(res_o); + #ifdef Py_GIL_DISABLED + // Ensure that Py_TAG_DEFERRED is set for Py_True and Py_False + // so that ops like _POP_JUMP_IF_FALSE can use the faster + // PyStackRef_IsExactly() check. + if (_Py_IsImmortal(res_o)) { + res.bits |= Py_TAG_DEFERRED; + } + #endif stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); } @@ -3077,13 +3083,7 @@ oparg = CURRENT_OPARG(); right = stack_pointer[-1]; left = stack_pointer[-2]; - #ifdef Py_GIL_DISABLED - // On free-threaded builds, objects are conditionally immortalized. - // So their bits don't always compare equally. int res = Py_Is(PyStackRef_AsPyObjectBorrow(left), PyStackRef_AsPyObjectBorrow(right)) ^ oparg; - #else - int res = PyStackRef_Is(left, right) ^ oparg; - #endif PyStackRef_CLOSE(left); PyStackRef_CLOSE(right); b = res ? PyStackRef_True : PyStackRef_False; @@ -3287,7 +3287,7 @@ _PyStackRef value; _PyStackRef b; value = stack_pointer[-1]; - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; } else { @@ -4529,7 +4529,7 @@ _PyStackRef should_be_none; should_be_none = stack_pointer[-1]; assert(STACK_LEVEL() == 2); - if (!PyStackRef_Is(should_be_none, PyStackRef_None)) { + if (!PyStackRef_IsNone(should_be_none)) { _PyFrame_SetStackPointer(frame, stack_pointer); PyErr_Format(PyExc_TypeError, "__init__() should return None, not '%.200s'", @@ -5610,7 +5610,7 @@ case _GUARD_IS_TRUE_POP: { _PyStackRef flag; flag = stack_pointer[-1]; - int is_true = PyStackRef_Is(flag, PyStackRef_True); + int is_true = PyStackRef_IsTrue(flag); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (!is_true) { @@ -5623,7 +5623,7 @@ case _GUARD_IS_FALSE_POP: { _PyStackRef flag; flag = stack_pointer[-1]; - int is_false = PyStackRef_Is(flag, PyStackRef_False); + int is_false = PyStackRef_IsFalse(flag); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (!is_false) { @@ -5636,7 +5636,7 @@ case _GUARD_IS_NONE_POP: { _PyStackRef val; val = stack_pointer[-1]; - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); if (!is_none) { PyStackRef_CLOSE(val); stack_pointer += -1; @@ -5654,7 +5654,7 @@ case _GUARD_IS_NOT_NONE_POP: { _PyStackRef val; val = stack_pointer[-1]; - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); PyStackRef_CLOSE(val); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 15308d6f1f7146..fd115d3bc517b1 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -183,9 +183,7 @@ /* Skip 1 cache entry */ // _BINARY_OP_INPLACE_ADD_UNICODE { - #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE @@ -195,7 +193,7 @@ next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - DEOPT_IF(!PyStackRef_Is(*target_local, left), BINARY_OP); + DEOPT_IF(PyStackRef_AsPyObjectBorrow(*target_local) != left_o, BINARY_OP); STAT_INC(BINARY_OP, hit); /* Handle `left = left + right` or `left += right` for str. * @@ -3251,6 +3249,14 @@ } else { res = PyStackRef_FromPyObjectSteal(res_o); + #ifdef Py_GIL_DISABLED + // Ensure that Py_TAG_DEFERRED is set for Py_True and Py_False + // so that ops like _POP_JUMP_IF_FALSE can use the faster + // PyStackRef_IsExactly() check. + if (_Py_IsImmortal(res_o)) { + res.bits |= Py_TAG_DEFERRED; + } + #endif stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); } @@ -3824,7 +3830,7 @@ _PyStackRef should_be_none; should_be_none = stack_pointer[-1]; assert(STACK_LEVEL() == 2); - if (!PyStackRef_Is(should_be_none, PyStackRef_None)) { + if (!PyStackRef_IsNone(should_be_none)) { _PyFrame_SetStackPointer(frame, stack_pointer); PyErr_Format(PyExc_TypeError, "__init__() should return None, not '%.200s'", @@ -4760,7 +4766,7 @@ /* Skip 1 cache entry */ _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsExactly(cond, PyStackRef_False); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -4774,7 +4780,7 @@ INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_NONE); /* Skip 1 cache entry */ _PyStackRef value_stackref = POP(); - int flag = PyStackRef_Is(value_stackref, PyStackRef_None); + int flag = PyStackRef_IsNone(value_stackref); int offset; if (flag) { offset = oparg; @@ -4796,7 +4802,7 @@ /* Skip 1 cache entry */ _PyStackRef value_stackref = POP(); int offset; - int nflag = PyStackRef_Is(value_stackref, PyStackRef_None); + int nflag = PyStackRef_IsNone(value_stackref); if (nflag) { offset = 0; } @@ -4819,7 +4825,7 @@ /* Skip 1 cache entry */ _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsExactly(cond, PyStackRef_True); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -5040,13 +5046,7 @@ _PyStackRef b; right = stack_pointer[-1]; left = stack_pointer[-2]; - #ifdef Py_GIL_DISABLED - // On free-threaded builds, objects are conditionally immortalized. - // So their bits don't always compare equally. int res = Py_Is(PyStackRef_AsPyObjectBorrow(left), PyStackRef_AsPyObjectBorrow(right)) ^ oparg; - #else - int res = PyStackRef_Is(left, right) ^ oparg; - #endif PyStackRef_CLOSE(left); PyStackRef_CLOSE(right); b = res ? PyStackRef_True : PyStackRef_False; @@ -6647,7 +6647,7 @@ _PyErr_StackItem *exc_info = tstate->exc_info; _PyFrame_SetStackPointer(frame, stack_pointer); Py_XSETREF(exc_info->exc_value, - PyStackRef_Is(exc_value, PyStackRef_None) + PyStackRef_IsNone(exc_value) ? NULL : PyStackRef_AsPyObjectSteal(exc_value)); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += -1; @@ -6664,7 +6664,7 @@ /* Skip 1 cache entry */ cond = stack_pointer[-1]; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsExactly(cond, PyStackRef_False); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); stack_pointer += -1; @@ -6684,7 +6684,7 @@ // _IS_NONE { value = stack_pointer[-1]; - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; } else { @@ -6696,7 +6696,7 @@ { cond = b; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsExactly(cond, PyStackRef_True); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } @@ -6717,7 +6717,7 @@ // _IS_NONE { value = stack_pointer[-1]; - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; } else { @@ -6729,7 +6729,7 @@ { cond = b; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsExactly(cond, PyStackRef_False); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } @@ -6747,7 +6747,7 @@ /* Skip 1 cache entry */ cond = stack_pointer[-1]; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsExactly(cond, PyStackRef_True); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); stack_pointer += -1; @@ -7084,7 +7084,7 @@ gen_frame->previous = frame; DISPATCH_INLINED(gen_frame); } - if (PyStackRef_Is(v, PyStackRef_None) && PyIter_Check(receiver_o)) { + if (PyStackRef_IsNone(v) && PyIter_Check(receiver_o)) { _PyFrame_SetStackPointer(frame, stack_pointer); retval_o = Py_TYPE(receiver_o)->tp_iternext(receiver_o); stack_pointer = _PyFrame_GetStackPointer(frame); @@ -7868,7 +7868,7 @@ /* Skip 2 cache entries */ value = stack_pointer[-1]; // This one is a bit weird, because we expect *some* failures: - DEOPT_IF(!PyStackRef_Is(value, PyStackRef_None), TO_BOOL); + DEOPT_IF(!PyStackRef_IsNone(value), TO_BOOL); STAT_INC(TO_BOOL, hit); res = PyStackRef_False; stack_pointer[-1] = res; @@ -7943,7 +7943,7 @@ _PyStackRef res; value = stack_pointer[-1]; assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_Is(value, PyStackRef_False) + res = PyStackRef_IsExactly(value, PyStackRef_False) ? PyStackRef_True : PyStackRef_False; stack_pointer[-1] = res; DISPATCH(); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 96b32445fb62e2..b1a66c5f57f95c 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -548,7 +548,10 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", - "PyStackRef_Is", + "PyStackRef_IsExactly", + "PyStackRef_IsNone", + "PyStackRef_IsTrue", + "PyStackRef_IsFalse", "PyStackRef_IsNull", "PyStackRef_None", "PyStackRef_TYPE", From 16f7e7b4672dfa78c89f9a528c201e06b82858d6 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 21 Nov 2024 16:08:27 +0000 Subject: [PATCH 2/5] Simplify bytecodes.c logic a bit --- Python/bytecodes.c | 12 ++---------- Python/executor_cases.c.h | 8 -------- Python/generated_cases.c.h | 14 +++----------- 3 files changed, 5 insertions(+), 29 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 25a4c2bcd4cffe..7e2a769e63f7d9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2406,14 +2406,6 @@ dummy_func( } else { res = PyStackRef_FromPyObjectSteal(res_o); -#ifdef Py_GIL_DISABLED - // Ensure that Py_TAG_DEFERRED is set for Py_True and Py_False - // so that ops like _POP_JUMP_IF_FALSE can use the faster - // PyStackRef_IsExactly() check. - if (_Py_IsImmortal(res_o)) { - res.bits |= Py_TAG_DEFERRED; - } -#endif } } @@ -2693,7 +2685,7 @@ dummy_func( replaced op(_POP_JUMP_IF_FALSE, (cond -- )) { assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_IsExactly(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); DEAD(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); @@ -4721,7 +4713,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_FALSE, (unused/1 -- )) { _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_IsExactly(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 796d96ce0cae67..0bd96372ef32e9 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2976,14 +2976,6 @@ } else { res = PyStackRef_FromPyObjectSteal(res_o); - #ifdef Py_GIL_DISABLED - // Ensure that Py_TAG_DEFERRED is set for Py_True and Py_False - // so that ops like _POP_JUMP_IF_FALSE can use the faster - // PyStackRef_IsExactly() check. - if (_Py_IsImmortal(res_o)) { - res.bits |= Py_TAG_DEFERRED; - } - #endif stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index fd115d3bc517b1..5abd0f237da330 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3249,14 +3249,6 @@ } else { res = PyStackRef_FromPyObjectSteal(res_o); - #ifdef Py_GIL_DISABLED - // Ensure that Py_TAG_DEFERRED is set for Py_True and Py_False - // so that ops like _POP_JUMP_IF_FALSE can use the faster - // PyStackRef_IsExactly() check. - if (_Py_IsImmortal(res_o)) { - res.bits |= Py_TAG_DEFERRED; - } - #endif stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); } @@ -4766,7 +4758,7 @@ /* Skip 1 cache entry */ _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_IsExactly(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -6664,7 +6656,7 @@ /* Skip 1 cache entry */ cond = stack_pointer[-1]; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_IsExactly(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); stack_pointer += -1; @@ -6729,7 +6721,7 @@ { cond = b; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_IsExactly(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } From 3631451d00a3ecf9555ff896100ebb5cc210d65e Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 21 Nov 2024 14:24:36 -0500 Subject: [PATCH 3/5] Update Include/internal/pycore_stackref.h Co-authored-by: Pieter Eendebak --- Include/internal/pycore_stackref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index e6b61dfb56d0d7..e59d41eb04631f 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -191,7 +191,7 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; // Check if a stackref is exactly the same as another stackref, including the // the deferred bit. This can only be used safely if you know that the deferred -// of `a` and `b` bits match. +// bits of `a` and `b` match. #define PyStackRef_IsExactly(a, b) ((a).bits == (b).bits) // Checks that mask out the deferred bit in the free threading build. From 06ab2ecd346ed6e6e491ec4453b698cd5e7e11f2 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 21 Nov 2024 20:23:56 +0000 Subject: [PATCH 4/5] Add assert to PyStackRef_IsExactly --- Include/internal/pycore_stackref.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index e59d41eb04631f..90a3118352f7ae 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -192,7 +192,8 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; // Check if a stackref is exactly the same as another stackref, including the // the deferred bit. This can only be used safely if you know that the deferred // bits of `a` and `b` match. -#define PyStackRef_IsExactly(a, b) ((a).bits == (b).bits) +#define PyStackRef_IsExactly(a, b) \ + (assert(((a).bits & Py_TAG_BITS) == ((b).bits & Py_TAG_BITS)), (a).bits == (b).bits) // Checks that mask out the deferred bit in the free threading build. #define PyStackRef_IsNone(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_None) From a9e487204eeb88da0e3630c36263757aee0754c1 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 22 Nov 2024 14:19:30 +0000 Subject: [PATCH 5/5] Use PyStackRef_IsFalse and PyStackRef_IsTrue --- Python/bytecodes.c | 6 +++--- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7e2a769e63f7d9..3ab4eb0ff64625 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -376,7 +376,7 @@ dummy_func( pure inst(UNARY_NOT, (value -- res)) { assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_IsExactly(value, PyStackRef_False) + res = PyStackRef_IsFalse(value) ? PyStackRef_True : PyStackRef_False; DEAD(value); } @@ -2693,7 +2693,7 @@ dummy_func( replaced op(_POP_JUMP_IF_TRUE, (cond -- )) { assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_IsExactly(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); DEAD(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); @@ -4704,7 +4704,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_TRUE, (unused/1 -- )) { _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_IsExactly(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0bd96372ef32e9..05fb14af2fc63b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -445,7 +445,7 @@ _PyStackRef res; value = stack_pointer[-1]; assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_IsExactly(value, PyStackRef_False) + res = PyStackRef_IsFalse(value) ? PyStackRef_True : PyStackRef_False; stack_pointer[-1] = res; break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 5abd0f237da330..3a1baea550adfb 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4817,7 +4817,7 @@ /* Skip 1 cache entry */ _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_IsExactly(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -6688,7 +6688,7 @@ { cond = b; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_IsExactly(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } @@ -6739,7 +6739,7 @@ /* Skip 1 cache entry */ cond = stack_pointer[-1]; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_IsExactly(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); stack_pointer += -1; @@ -7935,7 +7935,7 @@ _PyStackRef res; value = stack_pointer[-1]; assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_IsExactly(value, PyStackRef_False) + res = PyStackRef_IsFalse(value) ? PyStackRef_True : PyStackRef_False; stack_pointer[-1] = res; DISPATCH();