Skip to content
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ struct _typeobject {
* by code other than the specializer and interpreter. */
struct _specialization_cache {
PyObject *getitem;
PyObject *init;
};

/* The *real* layout of a type object when allocated on the heap */
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ typedef struct _PyInterpreterFrame {
PyFrameState f_state; /* What state the frame is in */
bool is_entry; // Whether this is the "root" frame for the current _PyCFrame.
bool is_generator;
PyObject *self; /* Borrowed reference used by __init__ frames to return self in RETURN_VALUE */
PyObject *localsplus[1];
} _PyInterpreterFrame;

Expand Down Expand Up @@ -118,6 +119,7 @@ _PyFrame_InitializeSpecials(
frame->f_state = FRAME_CREATED;
frame->is_entry = false;
frame->is_generator = false;
frame->self = NULL;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to reviewers: tied to frame state instead of some cache/call_shape so that subsequent nested calls don't destroy self (and we can identify which frame the self belongs to). Consider the following code:

class Tokenizer: def __init__(self): self.__next() # Kaboom! def __next(self): pass for _ in range(10): print(Tokenizer())
}

/* Gets the pointer to the locals array
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_typeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ extern PyStatus _PyTypes_InitSlotDefs(void);

extern void _PyStaticType_Dealloc(PyTypeObject *type);

extern PyObject *_PyObject_New_Vector(PyTypeObject *type,
PyObject *const *args, Py_ssize_t nargs, PyObject *kwds);

#ifdef __cplusplus
}
Expand Down
29 changes: 15 additions & 14 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ def jabs_op(name, op, entries=0):
"PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST",
"PRECALL_BOUND_METHOD",
"PRECALL_PYFUNC",
"PRECALL_PY_CLASS",
"RESUME_QUICK",
"STORE_ATTR_ADAPTIVE",
"STORE_ATTR_INSTANCE_VALUE",
Expand Down
6 changes: 3 additions & 3 deletions Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,7 @@ class C(object): pass
def func():
return sys._getframe()
x = func()
check(x, size('3Pi3c7P2ic??2P'))
check(x, size('3Pi3c7P2ic??3P'))
# function
def func(): pass
check(func, size('14Pi'))
Expand All @@ -1414,7 +1414,7 @@ def bar(cls):
check(bar, size('PP'))
# generator
def get_gen(): yield 1
check(get_gen(), size('P2P4P4c7P2ic??P'))
check(get_gen(), size('P2P4P4c7P2ic??2P'))
# iterator
check(iter('abc'), size('lP'))
# callable-iterator
Expand Down Expand Up @@ -1506,7 +1506,7 @@ def delx(self): del self.__x
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P'
'1P' # Specializer cache
'2P' # Specializer cache
)
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Calls to Python classes are now specialized. Creating objects from Python
classes should now be faster. Patch by Ken Jin.
10 changes: 9 additions & 1 deletion Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -4514,7 +4514,15 @@ object_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
object_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
if (excess_args(args, kwds)) {
return _PyObject_New_Vector(type, (_PyTuple_CAST(args)->ob_item),
PyTuple_GET_SIZE(args), kwds);
}

PyObject *
_PyObject_New_Vector(PyTypeObject *type, PyObject *const *args,
Py_ssize_t nargs, PyObject *kwds)
{
if (nargs || (kwds && PyDict_Check(kwds) && PyDict_GET_SIZE(kwds))) {
if (type->tp_new != object_new) {
PyErr_SetString(PyExc_TypeError,
"object.__new__() takes exactly one argument (the type to instantiate)");
Expand Down
69 changes: 69 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1587,6 +1587,11 @@ pop_frame(PyThreadState *tstate, _PyInterpreterFrame *frame)
*/
typedef struct {
PyObject *kwnames;
/* __init__ is special because while it returns None, we need to return self
This tells CALL to pass the current self to the new frame (the __init__ frame).
Where it is eventually consumed by RETURN_VALUE.
*/
bool init_pass_self;
} CallShape;

static inline bool
Expand Down Expand Up @@ -1618,6 +1623,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyCFrame cframe;
CallShape call_shape;
call_shape.kwnames = NULL; // Borrowed reference. Reset by CALL instructions.
call_shape.init_pass_self = 0;

/* WARNING: Because the _PyCFrame lives on the C stack,
* but can be accessed from a heap allocated object (tstate)
Expand Down Expand Up @@ -2386,6 +2392,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int

TARGET(RETURN_VALUE) {
PyObject *retval = POP();
if (frame->self != NULL) {
if (Py_IsNone(retval)) {
Py_SETREF(retval, frame->self);
frame->self = NULL;
}
/* We need this to continue raising errors when bad-practice
__init__s return their non-None values. This is later
caught by the interpreter. */
else {
Py_CLEAR(frame->self);
}
}
assert(EMPTY());
frame->f_state = FRAME_RETURNED;
_PyFrame_SetStackPointer(frame, stack_pointer);
Expand Down Expand Up @@ -4617,6 +4635,40 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DISPATCH();
}

TARGET(PRECALL_PY_CLASS) {
SpecializedCacheEntry *cache = GET_CACHE();
_PyAdaptiveEntry *cache0 = &cache[0].adaptive;
_PyCallCache *cache1 = &cache[-1].call;
int original_oparg = cache->adaptive.original_oparg;
int is_method = (PEEK(original_oparg + 2) != NULL);
DEOPT_IF(is_method, PRECALL);
PyObject *cls = PEEK(original_oparg + 1);
DEOPT_IF(!PyType_Check(cls), PRECALL);
PyTypeObject *cls_t = (PyTypeObject *)cls;
DEOPT_IF(cls_t->tp_version_tag != cache0->version, PRECALL);
assert(cls_t->tp_flags & Py_TPFLAGS_HEAPTYPE);
PyObject *init = ((PyHeapTypeObject *)cls_t)->_spec_cache.init;
assert(PyFunction_Check(init));
DEOPT_IF(((PyFunctionObject *)(init))->func_version != cache1->func_version, PRECALL);
DEOPT_IF(cls_t->tp_new != PyBaseObject_Type.tp_new, PRECALL);
STAT_INC(PRECALL, hit);

PyObject *self = _PyObject_New_Vector(cls_t, &PEEK(original_oparg),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the specializer only specialized for classes that don't override __new__, you can avoid calling __new__ and just construct the object directly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_PyObject_New_Vector does some argument checking. https://github.com/python/cpython/pull/31707/files#diff-1decebeef15f4e0b0ce106c665751ec55068d4d1d1825847925ad4f528b5b872R4525

Come to think of it, if we verify that the argcount is right at specialization time, do we need to re-verify at runtime? Would it be safe to call tp_alloc directly? It seems that the only thing that could change is the kwds dict, but even then that's only used for argument count checking again.

(Py_ssize_t)original_oparg, call_shape.kwnames);
if (self == NULL) {
goto error;
}
Py_INCREF(init);
PEEK(original_oparg+1) = self;
PEEK(original_oparg+2) = init;
Py_DECREF(cls);

/* For use in RETURN_VALUE later */
assert(call_shape.init_pass_self == false);
call_shape.init_pass_self = true;
DISPATCH();
}

TARGET(KW_NAMES) {
assert(call_shape.kwnames == NULL);
assert(oparg < PyTuple_GET_SIZE(consts));
Expand Down Expand Up @@ -4651,6 +4703,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
cframe.current_frame = frame = new_frame;
if (call_shape.init_pass_self) {
assert(frame->self == NULL);
frame->self = Py_NewRef(frame->localsplus[0]);
call_shape.init_pass_self = false;
}
CALL_STAT_INC(inlined_py_calls);
goto start_frame;
}
Expand Down Expand Up @@ -4762,6 +4819,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
if (call_shape.init_pass_self) {
assert(frame->self == NULL);
frame->self = Py_NewRef(frame->localsplus[0]);
call_shape.init_pass_self = false;
}
goto start_frame;
}

Expand Down Expand Up @@ -4803,6 +4865,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
if (call_shape.init_pass_self) {
assert(frame->self == NULL);
frame->self = Py_NewRef(frame->localsplus[0]);
call_shape.init_pass_self = false;
}
goto start_frame;
}

Expand Down Expand Up @@ -5617,6 +5684,7 @@ MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)

error:
call_shape.kwnames = NULL;
call_shape.init_pass_self = false;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to reviewers: We don't set frame->self = NULL here because that means exceptions will destroy self. E.g. consider this:

class A: def __init__(self): try: A.a # Kaboom! except AttributeError: pass for _ in range(10): print(A())
/* Double-check exception status. */
#ifdef NDEBUG
if (!_PyErr_Occurred(tstate)) {
Expand Down Expand Up @@ -5658,6 +5726,7 @@ MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)
assert(STACK_LEVEL() == 0);
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->f_state = FRAME_RAISED;
Py_CLEAR(frame->self);
TRACE_FUNCTION_UNWIND();
DTRACE_FUNCTION_EXIT();
goto exit_unwind;
Expand Down
14 changes: 7 additions & 7 deletions Python/opcode_targets.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 37 additions & 10 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -591,15 +591,16 @@ initial_counter_value(void) {
#define SPEC_FAIL_CALL_BAD_CALL_FLAGS 17
#define SPEC_FAIL_CALL_CLASS 18
#define SPEC_FAIL_CALL_PYTHON_CLASS 19
#define SPEC_FAIL_CALL_METHOD_DESCRIPTOR 20
#define SPEC_FAIL_CALL_BOUND_METHOD 21
#define SPEC_FAIL_CALL_STR 22
#define SPEC_FAIL_CALL_CLASS_NO_VECTORCALL 23
#define SPEC_FAIL_CALL_CLASS_MUTABLE 24
#define SPEC_FAIL_CALL_KWNAMES 25
#define SPEC_FAIL_CALL_METHOD_WRAPPER 26
#define SPEC_FAIL_CALL_OPERATOR_WRAPPER 27
#define SPEC_FAIL_CALL_PYFUNCTION 28
#define SPEC_FAIL_CALL_PYTHON_CLASS_NON_PY_INIT 20
#define SPEC_FAIL_CALL_METHOD_DESCRIPTOR 21
#define SPEC_FAIL_CALL_BOUND_METHOD 22
#define SPEC_FAIL_CALL_STR 23
#define SPEC_FAIL_CALL_CLASS_NO_VECTORCALL 24
#define SPEC_FAIL_CALL_CLASS_MUTABLE 25
#define SPEC_FAIL_CALL_KWNAMES 26
#define SPEC_FAIL_CALL_METHOD_WRAPPER 27
#define SPEC_FAIL_CALL_OPERATOR_WRAPPER 28
#define SPEC_FAIL_CALL_PYFUNCTION 29

/* COMPARE_OP */
#define SPEC_FAIL_COMPARE_OP_DIFFERENT_TYPES 12
Expand Down Expand Up @@ -1523,7 +1524,33 @@ specialize_class_call(
assert(_Py_OPCODE(*instr) == PRECALL_ADAPTIVE);
PyTypeObject *tp = _PyType_CAST(callable);
if (tp->tp_new == PyBaseObject_Type.tp_new) {
SPECIALIZATION_FAIL(PRECALL, SPEC_FAIL_CALL_PYTHON_CLASS);
_PyAdaptiveEntry *cache0 = &cache[0].adaptive;
_PyCallCache *cache1 = &cache[-1].call;
PyObject *descriptor = _PyType_Lookup(tp, &_Py_ID(__init__));
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
if (!(tp->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
return -1;
}
PyFunctionObject *func = (PyFunctionObject *)descriptor;
PyCodeObject *fcode = (PyCodeObject *)func->func_code;
int kind = function_kind(fcode);
if (kind != SIMPLE_FUNCTION) {
SPECIALIZATION_FAIL(PRECALL, kind);
return -1;
}
assert(tp->tp_version_tag != 0);
cache0->version = tp->tp_version_tag;
int version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0 || version != (uint16_t)version) {
SPECIALIZATION_FAIL(PRECALL, SPEC_FAIL_OUT_OF_VERSIONS);
return -1;
}
cache1->func_version = version;
((PyHeapTypeObject *)tp)->_spec_cache.init = descriptor;
*instr = _Py_MAKECODEUNIT(PRECALL_PY_CLASS, _Py_OPARG(*instr));
return 0;
}
SPECIALIZATION_FAIL(PRECALL, SPEC_FAIL_CALL_PYTHON_CLASS_NON_PY_INIT);
return -1;
}
if (tp->tp_flags & Py_TPFLAGS_IMMUTABLETYPE) {
Expand Down