Skip to content

Commit dbf9cff

Browse files
authored
bpo-32929: Dataclasses: Change the tri-state hash parameter to the boolean unsafe_hash. (#5891)
unsafe_hash=False is now the default. It is the same behavior as the old hash=None parameter. unsafe_hash=True will try to add __hash__. If it already exists, TypeError is raised.
1 parent 9c17e3a commit dbf9cff

File tree

3 files changed

+235
-195
lines changed

3 files changed

+235
-195
lines changed

Lib/dataclasses.py

Lines changed: 105 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020

2121
# Conditions for adding methods. The boxes indicate what action the
2222
# dataclass decorator takes. For all of these tables, when I talk
23-
# about init=, repr=, eq=, order=, hash=, or frozen=, I'm referring
24-
# to the arguments to the @dataclass decorator. When checking if a
25-
# dunder method already exists, I mean check for an entry in the
26-
# class's __dict__. I never check to see if an attribute is defined
27-
# in a base class.
23+
# about init=, repr=, eq=, order=, unsafe_hash=, or frozen=, I'm
24+
# referring to the arguments to the @dataclass decorator. When
25+
# checking if a dunder method already exists, I mean check for an
26+
# entry in the class's __dict__. I never check to see if an
27+
# attribute is defined in a base class.
2828

2929
# Key:
3030
# +=========+=========================================+
@@ -34,11 +34,6 @@
3434
# +---------+-----------------------------------------+
3535
# | add | Generated method is added. |
3636
# +---------+-----------------------------------------+
37-
# | add* | Generated method is added only if the |
38-
# | | existing attribute is None and if the |
39-
# | | user supplied a __eq__ method in the |
40-
# | | class definition. |
41-
# +---------+-----------------------------------------+
4237
# | raise | TypeError is raised. |
4338
# +---------+-----------------------------------------+
4439
# | None | Attribute is set to None. |
@@ -115,43 +110,36 @@
115110

116111
# __hash__
117112

118-
# +------------------- hash= parameter
119-
# | +----------- eq= parameter
120-
# | | +--- frozen= parameter
121-
# | | |
122-
# v v v | | |
123-
# | no | yes | <--- class has __hash__ in __dict__?
124-
# +=========+=======+=======+========+========+
125-
# | 1 None | False | False | | | No __eq__, use the base class __hash__
126-
# +---------+-------+-------+--------+--------+
127-
# | 2 None | False | True | | | No __eq__, use the base class __hash__
128-
# +---------+-------+-------+--------+--------+
129-
# | 3 None | True | False | None | | <-- the default, not hashable
130-
# +---------+-------+-------+--------+--------+
131-
# | 4 None | True | True | add | add* | Frozen, so hashable
132-
# +---------+-------+-------+--------+--------+
133-
# | 5 False | False | False | | |
134-
# +---------+-------+-------+--------+--------+
135-
# | 6 False | False | True | | |
136-
# +---------+-------+-------+--------+--------+
137-
# | 7 False | True | False | | |
138-
# +---------+-------+-------+--------+--------+
139-
# | 8 False | True | True | | |
140-
# +---------+-------+-------+--------+--------+
141-
# | 9 True | False | False | add | add* | Has no __eq__, but hashable
142-
# +---------+-------+-------+--------+--------+
143-
# |10 True | False | True | add | add* | Has no __eq__, but hashable
144-
# +---------+-------+-------+--------+--------+
145-
# |11 True | True | False | add | add* | Not frozen, but hashable
146-
# +---------+-------+-------+--------+--------+
147-
# |12 True | True | True | add | add* | Frozen, so hashable
148-
# +=========+=======+=======+========+========+
113+
# +------------------- unsafe_hash= parameter
114+
# | +----------- eq= parameter
115+
# | | +--- frozen= parameter
116+
# | | |
117+
# v v v | | |
118+
# | no | yes | <--- class has explicitly defined __hash__
119+
# +=======+=======+=======+========+========+
120+
# | False | False | False | | | No __eq__, use the base class __hash__
121+
# +-------+-------+-------+--------+--------+
122+
# | False | False | True | | | No __eq__, use the base class __hash__
123+
# +-------+-------+-------+--------+--------+
124+
# | False | True | False | None | | <-- the default, not hashable
125+
# +-------+-------+-------+--------+--------+
126+
# | False | True | True | add | | Frozen, so hashable, allows override
127+
# +-------+-------+-------+--------+--------+
128+
# | True | False | False | add | raise | Has no __eq__, but hashable
129+
# +-------+-------+-------+--------+--------+
130+
# | True | False | True | add | raise | Has no __eq__, but hashable
131+
# +-------+-------+-------+--------+--------+
132+
# | True | True | False | add | raise | Not frozen, but hashable
133+
# +-------+-------+-------+--------+--------+
134+
# | True | True | True | add | raise | Frozen, so hashable
135+
# +=======+=======+=======+========+========+
149136
# For boxes that are blank, __hash__ is untouched and therefore
150137
# inherited from the base class. If the base is object, then
151138
# id-based hashing is used.
152139
# Note that a class may have already __hash__=None if it specified an
153140
# __eq__ method in the class body (not one that was created by
154141
# @dataclass).
142+
# See _hash_action (below) for a coded version of this table.
155143

156144

157145
# Raised when an attempt is made to modify a frozen class.
@@ -557,7 +545,45 @@ def _set_new_attribute(cls, name, value):
557545
return False
558546

559547

560-
def _process_class(cls, repr, eq, order, hash, init, frozen):
548+
# Decide if/how we're going to create a hash function. Key is
549+
# (unsafe_hash, eq, frozen, does-hash-exist). Value is the action to
550+
# take.
551+
# Actions:
552+
# '': Do nothing.
553+
# 'none': Set __hash__ to None.
554+
# 'add': Always add a generated __hash__function.
555+
# 'exception': Raise an exception.
556+
#
557+
# +-------------------------------------- unsafe_hash?
558+
# | +------------------------------- eq?
559+
# | | +------------------------ frozen?
560+
# | | | +---------------- has-explicit-hash?
561+
# | | | |
562+
# | | | | +------- action
563+
# | | | | |
564+
# v v v v v
565+
_hash_action = {(False, False, False, False): (''),
566+
(False, False, False, True ): (''),
567+
(False, False, True, False): (''),
568+
(False, False, True, True ): (''),
569+
(False, True, False, False): ('none'),
570+
(False, True, False, True ): (''),
571+
(False, True, True, False): ('add'),
572+
(False, True, True, True ): (''),
573+
(True, False, False, False): ('add'),
574+
(True, False, False, True ): ('exception'),
575+
(True, False, True, False): ('add'),
576+
(True, False, True, True ): ('exception'),
577+
(True, True, False, False): ('add'),
578+
(True, True, False, True ): ('exception'),
579+
(True, True, True, False): ('add'),
580+
(True, True, True, True ): ('exception'),
581+
}
582+
# See https://bugs.python.org/issue32929#msg312829 for an if-statement
583+
# version of this table.
584+
585+
586+
def _process_class(cls, repr, eq, order, unsafe_hash, init, frozen):
561587
# Now that dicts retain insertion order, there's no reason to use
562588
# an ordered dict. I am leveraging that ordering here, because
563589
# derived class fields overwrite base class fields, but the order
@@ -605,8 +631,14 @@ def _process_class(cls, repr, eq, order, hash, init, frozen):
605631
# be inherited down.
606632
is_frozen = frozen or cls.__setattr__ is _frozen_setattr
607633

608-
# Was this class defined with an __eq__? Used in __hash__ logic.
609-
auto_hash_test= '__eq__' in cls.__dict__ and getattr(cls.__dict__, '__hash__', MISSING) is None
634+
# Was this class defined with an explicit __hash__? Note that if
635+
# __eq__ is defined in this class, then python will automatically
636+
# set __hash__ to None. This is a heuristic, as it's possible
637+
# that such a __hash__ == None was not auto-generated, but it
638+
# close enough.
639+
class_hash = cls.__dict__.get('__hash__', MISSING)
640+
has_explicit_hash = not (class_hash is MISSING or
641+
(class_hash is None and '__eq__' in cls.__dict__))
610642

611643
# If we're generating ordering methods, we must be generating
612644
# the eq methods.
@@ -661,48 +693,38 @@ def _process_class(cls, repr, eq, order, hash, init, frozen):
661693
if _set_new_attribute(cls, name,
662694
_cmp_fn(name, op, self_tuple, other_tuple)):
663695
raise TypeError(f'Cannot overwrite attribute {name} '
664-
f'in {cls.__name__}. Consider using '
696+
f'in class {cls.__name__}. Consider using '
665697
'functools.total_ordering')
666698

667699
if is_frozen:
668700
for name, fn in [('__setattr__', _frozen_setattr),
669701
('__delattr__', _frozen_delattr)]:
670702
if _set_new_attribute(cls, name, fn):
671703
raise TypeError(f'Cannot overwrite attribute {name} '
672-
f'in {cls.__name__}')
704+
f'in class {cls.__name__}')
673705

674706
# Decide if/how we're going to create a hash function.
675-
# TODO: Move this table to module scope, so it's not recreated
676-
# all the time.
677-
generate_hash = {(None, False, False): ('', ''),
678-
(None, False, True): ('', ''),
679-
(None, True, False): ('none', ''),
680-
(None, True, True): ('fn', 'fn-x'),
681-
(False, False, False): ('', ''),
682-
(False, False, True): ('', ''),
683-
(False, True, False): ('', ''),
684-
(False, True, True): ('', ''),
685-
(True, False, False): ('fn', 'fn-x'),
686-
(True, False, True): ('fn', 'fn-x'),
687-
(True, True, False): ('fn', 'fn-x'),
688-
(True, True, True): ('fn', 'fn-x'),
689-
}[None if hash is None else bool(hash), # Force bool() if not None.
690-
bool(eq),
691-
bool(frozen)]['__hash__' in cls.__dict__]
707+
hash_action = _hash_action[bool(unsafe_hash),
708+
bool(eq),
709+
bool(frozen),
710+
has_explicit_hash]
711+
692712
# No need to call _set_new_attribute here, since we already know if
693713
# we're overwriting a __hash__ or not.
694-
if generate_hash == '':
714+
if hash_action == '':
695715
# Do nothing.
696716
pass
697-
elif generate_hash == 'none':
717+
elif hash_action == 'none':
698718
cls.__hash__ = None
699-
elif generate_hash in ('fn', 'fn-x'):
700-
if generate_hash == 'fn' or auto_hash_test:
701-
flds = [f for f in field_list
702-
if (f.compare if f.hash is None else f.hash)]
703-
cls.__hash__ = _hash_fn(flds)
719+
elif hash_action == 'add':
720+
flds = [f for f in field_list if (f.compare if f.hash is None else f.hash)]
721+
cls.__hash__ = _hash_fn(flds)
722+
elif hash_action == 'exception':
723+
# Raise an exception.
724+
raise TypeError(f'Cannot overwrite attribute __hash__ '
725+
f'in class {cls.__name__}')
704726
else:
705-
assert False, f"can't get here: {generate_hash}"
727+
assert False, f"can't get here: {hash_action}"
706728

707729
if not getattr(cls, '__doc__'):
708730
# Create a class doc-string.
@@ -716,21 +738,21 @@ def _process_class(cls, repr, eq, order, hash, init, frozen):
716738
# underscore. The presence of _cls is used to detect if this
717739
# decorator is being called with parameters or not.
718740
def dataclass(_cls=None, *, init=True, repr=True, eq=True, order=False,
719-
hash=None, frozen=False):
741+
unsafe_hash=None, frozen=False):
720742
"""Returns the same class as was passed in, with dunder methods
721743
added based on the fields defined in the class.
722744
723745
Examines PEP 526 __annotations__ to determine fields.
724746
725747
If init is true, an __init__() method is added to the class. If
726748
repr is true, a __repr__() method is added. If order is true, rich
727-
comparison dunder methods are added. If hash is true, a __hash__()
728-
method function is added. If frozen is true, fields may not be
729-
assigned to after instance creation.
749+
comparison dunder methods are added. If unsafe_hash is true, a
750+
__hash__() method function is added. If frozen is true, fields may
751+
not be assigned to after instance creation.
730752
"""
731753

732754
def wrap(cls):
733-
return _process_class(cls, repr, eq, order, hash, init, frozen)
755+
return _process_class(cls, repr, eq, order, unsafe_hash, init, frozen)
734756

735757
# See if we're being called as @dataclass or @dataclass().
736758
if _cls is None:
@@ -793,6 +815,7 @@ class C:
793815
raise TypeError("asdict() should be called on dataclass instances")
794816
return _asdict_inner(obj, dict_factory)
795817

818+
796819
def _asdict_inner(obj, dict_factory):
797820
if _is_dataclass_instance(obj):
798821
result = []
@@ -832,6 +855,7 @@ class C:
832855
raise TypeError("astuple() should be called on dataclass instances")
833856
return _astuple_inner(obj, tuple_factory)
834857

858+
835859
def _astuple_inner(obj, tuple_factory):
836860
if _is_dataclass_instance(obj):
837861
result = []
@@ -849,7 +873,8 @@ def _astuple_inner(obj, tuple_factory):
849873

850874

851875
def make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True,
852-
repr=True, eq=True, order=False, hash=None, frozen=False):
876+
repr=True, eq=True, order=False, unsafe_hash=None,
877+
frozen=False):
853878
"""Return a new dynamically created dataclass.
854879
855880
The dataclass name will be 'cls_name'. 'fields' is an iterable
@@ -869,7 +894,7 @@ class C(Base):
869894
870895
For the bases and namespace parameters, see the builtin type() function.
871896
872-
The parameters init, repr, eq, order, hash, and frozen are passed to
897+
The parameters init, repr, eq, order, unsafe_hash, and frozen are passed to
873898
dataclass().
874899
"""
875900

@@ -894,7 +919,8 @@ class C(Base):
894919
namespace['__annotations__'] = anns
895920
cls = type(cls_name, bases, namespace)
896921
return dataclass(cls, init=init, repr=repr, eq=eq, order=order,
897-
hash=hash, frozen=frozen)
922+
unsafe_hash=unsafe_hash, frozen=frozen)
923+
898924

899925
def replace(obj, **changes):
900926
"""Return a new object replacing specified fields with new values.

0 commit comments

Comments
 (0)