|
8 | 8 | from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries |
9 | 9 |
|
10 | 10 |
|
11 | | -class _DtypeOpsMixin: |
12 | | - # Not all of pandas' extension dtypes are compatibile with |
13 | | - # the new ExtensionArray interface. This means PandasExtensionDtype |
14 | | - # can't subclass ExtensionDtype yet, as is_extension_array_dtype would |
15 | | - # incorrectly say that these types are extension types. |
16 | | - # |
17 | | - # In the interim, we put methods that are shared between the two base |
18 | | - # classes ExtensionDtype and PandasExtensionDtype here. Both those base |
19 | | - # classes will inherit from this Mixin. Once everything is compatible, this |
20 | | - # class's methods can be moved to ExtensionDtype and removed. |
21 | | - |
22 | | - # na_value is the default NA value to use for this type. This is used in |
23 | | - # e.g. ExtensionArray.take. This should be the user-facing "boxed" version |
24 | | - # of the NA value, not the physical NA vaalue for storage. |
25 | | - # e.g. for JSONArray, this is an empty dictionary. |
26 | | - na_value = np.nan |
27 | | - _metadata = () # type: Tuple[str, ...] |
28 | | - |
29 | | - def __eq__(self, other): |
30 | | - """Check whether 'other' is equal to self. |
31 | | -
|
32 | | - By default, 'other' is considered equal if either |
33 | | -
|
34 | | - * it's a string matching 'self.name'. |
35 | | - * it's an instance of this type and all of the |
36 | | - the attributes in ``self._metadata`` are equal between |
37 | | - `self` and `other`. |
38 | | -
|
39 | | - Parameters |
40 | | - ---------- |
41 | | - other : Any |
42 | | -
|
43 | | - Returns |
44 | | - ------- |
45 | | - bool |
46 | | - """ |
47 | | - if isinstance(other, str): |
48 | | - try: |
49 | | - other = self.construct_from_string(other) |
50 | | - except TypeError: |
51 | | - return False |
52 | | - if isinstance(other, type(self)): |
53 | | - return all( |
54 | | - getattr(self, attr) == getattr(other, attr) |
55 | | - for attr in self._metadata |
56 | | - ) |
57 | | - return False |
58 | | - |
59 | | - def __hash__(self): |
60 | | - return hash(tuple(getattr(self, attr) for attr in self._metadata)) |
61 | | - |
62 | | - def __ne__(self, other): |
63 | | - return not self.__eq__(other) |
64 | | - |
65 | | - @property |
66 | | - def names(self) -> Optional[List[str]]: |
67 | | - """Ordered list of field names, or None if there are no fields. |
68 | | -
|
69 | | - This is for compatibility with NumPy arrays, and may be removed in the |
70 | | - future. |
71 | | - """ |
72 | | - return None |
73 | | - |
74 | | - @classmethod |
75 | | - def is_dtype(cls, dtype): |
76 | | - """Check if we match 'dtype'. |
77 | | -
|
78 | | - Parameters |
79 | | - ---------- |
80 | | - dtype : object |
81 | | - The object to check. |
82 | | -
|
83 | | - Returns |
84 | | - ------- |
85 | | - is_dtype : bool |
86 | | -
|
87 | | - Notes |
88 | | - ----- |
89 | | - The default implementation is True if |
90 | | -
|
91 | | - 1. ``cls.construct_from_string(dtype)`` is an instance |
92 | | - of ``cls``. |
93 | | - 2. ``dtype`` is an object and is an instance of ``cls`` |
94 | | - 3. ``dtype`` has a ``dtype`` attribute, and any of the above |
95 | | - conditions is true for ``dtype.dtype``. |
96 | | - """ |
97 | | - dtype = getattr(dtype, 'dtype', dtype) |
98 | | - |
99 | | - if isinstance(dtype, (ABCSeries, ABCIndexClass, |
100 | | - ABCDataFrame, np.dtype)): |
101 | | - # https://github.com/pandas-dev/pandas/issues/22960 |
102 | | - # avoid passing data to `construct_from_string`. This could |
103 | | - # cause a FutureWarning from numpy about failing elementwise |
104 | | - # comparison from, e.g., comparing DataFrame == 'category'. |
105 | | - return False |
106 | | - elif dtype is None: |
107 | | - return False |
108 | | - elif isinstance(dtype, cls): |
109 | | - return True |
110 | | - try: |
111 | | - return cls.construct_from_string(dtype) is not None |
112 | | - except TypeError: |
113 | | - return False |
114 | | - |
115 | | - @property |
116 | | - def _is_numeric(self) -> bool: |
117 | | - """ |
118 | | - Whether columns with this dtype should be considered numeric. |
119 | | -
|
120 | | - By default ExtensionDtypes are assumed to be non-numeric. |
121 | | - They'll be excluded from operations that exclude non-numeric |
122 | | - columns, like (groupby) reductions, plotting, etc. |
123 | | - """ |
124 | | - return False |
125 | | - |
126 | | - @property |
127 | | - def _is_boolean(self) -> bool: |
128 | | - """ |
129 | | - Whether this dtype should be considered boolean. |
130 | | -
|
131 | | - By default, ExtensionDtypes are assumed to be non-numeric. |
132 | | - Setting this to True will affect the behavior of several places, |
133 | | - e.g. |
134 | | -
|
135 | | - * is_bool |
136 | | - * boolean indexing |
137 | | -
|
138 | | - Returns |
139 | | - ------- |
140 | | - bool |
141 | | - """ |
142 | | - return False |
143 | | - |
144 | | - |
145 | | -class ExtensionDtype(_DtypeOpsMixin): |
| 11 | +class ExtensionDtype: |
146 | 12 | """ |
147 | 13 | A custom data type, to be paired with an ExtensionArray. |
148 | 14 |
|
@@ -202,10 +68,52 @@ class property**. |
202 | 68 | ``pandas.errors.AbstractMethodError`` and no ``register`` method is |
203 | 69 | provided for registering virtual subclasses. |
204 | 70 | """ |
| 71 | + # na_value is the default NA value to use for this type. This is used in |
| 72 | + # e.g. ExtensionArray.take. This should be the user-facing "boxed" version |
| 73 | + # of the NA value, not the physical NA value for storage. |
| 74 | + # e.g. for JSONArray, this is an empty dictionary. |
| 75 | + na_value = np.nan |
| 76 | + _metadata = () # type: Tuple[str, ...] |
205 | 77 |
|
206 | 78 | def __str__(self): |
207 | 79 | return self.name |
208 | 80 |
|
| 81 | + def __eq__(self, other): |
| 82 | + """Check whether 'other' is equal to self. |
| 83 | +
|
| 84 | + By default, 'other' is considered equal if either |
| 85 | +
|
| 86 | + * it's a string matching 'self.name'. |
| 87 | + * it's an instance of this type and all of the |
| 88 | + the attributes in ``self._metadata`` are equal between |
| 89 | + `self` and `other`. |
| 90 | +
|
| 91 | + Parameters |
| 92 | + ---------- |
| 93 | + other : Any |
| 94 | +
|
| 95 | + Returns |
| 96 | + ------- |
| 97 | + bool |
| 98 | + """ |
| 99 | + if isinstance(other, str): |
| 100 | + try: |
| 101 | + other = self.construct_from_string(other) |
| 102 | + except TypeError: |
| 103 | + return False |
| 104 | + if isinstance(other, type(self)): |
| 105 | + return all( |
| 106 | + getattr(self, attr) == getattr(other, attr) |
| 107 | + for attr in self._metadata |
| 108 | + ) |
| 109 | + return False |
| 110 | + |
| 111 | + def __hash__(self): |
| 112 | + return hash(tuple(getattr(self, attr) for attr in self._metadata)) |
| 113 | + |
| 114 | + def __ne__(self, other): |
| 115 | + return not self.__eq__(other) |
| 116 | + |
209 | 117 | @property |
210 | 118 | def type(self) -> Type: |
211 | 119 | """ |
@@ -243,6 +151,15 @@ def name(self) -> str: |
243 | 151 | """ |
244 | 152 | raise AbstractMethodError(self) |
245 | 153 |
|
| 154 | + @property |
| 155 | + def names(self) -> Optional[List[str]]: |
| 156 | + """Ordered list of field names, or None if there are no fields. |
| 157 | +
|
| 158 | + This is for compatibility with NumPy arrays, and may be removed in the |
| 159 | + future. |
| 160 | + """ |
| 161 | + return None |
| 162 | + |
246 | 163 | @classmethod |
247 | 164 | def construct_array_type(cls): |
248 | 165 | """ |
@@ -286,3 +203,73 @@ def construct_from_string(cls, string): |
286 | 203 | ... "'{}'".format(cls, string)) |
287 | 204 | """ |
288 | 205 | raise AbstractMethodError(cls) |
| 206 | + |
| 207 | + @classmethod |
| 208 | + def is_dtype(cls, dtype): |
| 209 | + """Check if we match 'dtype'. |
| 210 | +
|
| 211 | + Parameters |
| 212 | + ---------- |
| 213 | + dtype : object |
| 214 | + The object to check. |
| 215 | +
|
| 216 | + Returns |
| 217 | + ------- |
| 218 | + is_dtype : bool |
| 219 | +
|
| 220 | + Notes |
| 221 | + ----- |
| 222 | + The default implementation is True if |
| 223 | +
|
| 224 | + 1. ``cls.construct_from_string(dtype)`` is an instance |
| 225 | + of ``cls``. |
| 226 | + 2. ``dtype`` is an object and is an instance of ``cls`` |
| 227 | + 3. ``dtype`` has a ``dtype`` attribute, and any of the above |
| 228 | + conditions is true for ``dtype.dtype``. |
| 229 | + """ |
| 230 | + dtype = getattr(dtype, 'dtype', dtype) |
| 231 | + |
| 232 | + if isinstance(dtype, (ABCSeries, ABCIndexClass, |
| 233 | + ABCDataFrame, np.dtype)): |
| 234 | + # https://github.com/pandas-dev/pandas/issues/22960 |
| 235 | + # avoid passing data to `construct_from_string`. This could |
| 236 | + # cause a FutureWarning from numpy about failing elementwise |
| 237 | + # comparison from, e.g., comparing DataFrame == 'category'. |
| 238 | + return False |
| 239 | + elif dtype is None: |
| 240 | + return False |
| 241 | + elif isinstance(dtype, cls): |
| 242 | + return True |
| 243 | + try: |
| 244 | + return cls.construct_from_string(dtype) is not None |
| 245 | + except TypeError: |
| 246 | + return False |
| 247 | + |
| 248 | + @property |
| 249 | + def _is_numeric(self) -> bool: |
| 250 | + """ |
| 251 | + Whether columns with this dtype should be considered numeric. |
| 252 | +
|
| 253 | + By default ExtensionDtypes are assumed to be non-numeric. |
| 254 | + They'll be excluded from operations that exclude non-numeric |
| 255 | + columns, like (groupby) reductions, plotting, etc. |
| 256 | + """ |
| 257 | + return False |
| 258 | + |
| 259 | + @property |
| 260 | + def _is_boolean(self) -> bool: |
| 261 | + """ |
| 262 | + Whether this dtype should be considered boolean. |
| 263 | +
|
| 264 | + By default, ExtensionDtypes are assumed to be non-numeric. |
| 265 | + Setting this to True will affect the behavior of several places, |
| 266 | + e.g. |
| 267 | +
|
| 268 | + * is_bool |
| 269 | + * boolean indexing |
| 270 | +
|
| 271 | + Returns |
| 272 | + ------- |
| 273 | + bool |
| 274 | + """ |
| 275 | + return False |
0 commit comments