1515 is_extension_array_dtype ,
1616 is_float ,
1717 is_float_dtype ,
18- is_integer ,
1918 is_integer_dtype ,
2019 is_list_like ,
2120 is_numeric_dtype ,
22- is_object_dtype ,
2321 is_scalar ,
24- is_string_dtype ,
2522 pandas_dtype ,
2623)
2724from pandas .core .dtypes .dtypes import register_extension_dtype
2825from pandas .core .dtypes .generic import ABCDataFrame , ABCIndexClass , ABCSeries
2926from pandas .core .dtypes .missing import isna , notna
3027
3128from pandas .core import nanops , ops
32- from pandas .core .algorithms import take
33- from pandas .core .arrays import ExtensionArray , ExtensionOpsMixin
34- import pandas .core .common as com
35- from pandas .core .indexers import check_bool_array_indexer
29+
30+ from .masked import BaseMaskedArray
3631
3732if TYPE_CHECKING :
3833 from pandas ._typing import Scalar
@@ -199,7 +194,7 @@ def coerce_to_array(values, mask=None, copy: bool = False):
199194 return values , mask
200195
201196
202- class BooleanArray (ExtensionArray , ExtensionOpsMixin ):
197+ class BooleanArray (BaseMaskedArray ):
203198 """
204199 Array of boolean (True/False) data with missing values.
205200
@@ -253,6 +248,9 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin):
253248 Length: 3, dtype: boolean
254249 """
255250
251+ # The value used to fill '_data' to avoid upcasting
252+ _internal_fill_value = False
253+
256254 def __init__ (self , values : np .ndarray , mask : np .ndarray , copy : bool = False ):
257255 if not (isinstance (values , np .ndarray ) and values .dtype == np .bool_ ):
258256 raise TypeError (
@@ -297,127 +295,6 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
297295 def _from_factorized (cls , values , original : "BooleanArray" ):
298296 return cls ._from_sequence (values , dtype = original .dtype )
299297
300- def _formatter (self , boxed = False ):
301- return str
302-
303- @property
304- def _hasna (self ) -> bool :
305- # Note: this is expensive right now! The hope is that we can
306- # make this faster by having an optional mask, but not have to change
307- # source code using it..
308- return self ._mask .any ()
309-
310- def __getitem__ (self , item ):
311- if is_integer (item ):
312- if self ._mask [item ]:
313- return self .dtype .na_value
314- return self ._data [item ]
315-
316- elif com .is_bool_indexer (item ):
317- item = check_bool_array_indexer (self , item )
318-
319- return type (self )(self ._data [item ], self ._mask [item ])
320-
321- def to_numpy (
322- self , dtype = None , copy = False , na_value : "Scalar" = lib .no_default ,
323- ):
324- """
325- Convert to a NumPy Array.
326-
327- By default converts to an object-dtype NumPy array. Specify the `dtype` and
328- `na_value` keywords to customize the conversion.
329-
330- Parameters
331- ----------
332- dtype : dtype, default object
333- The numpy dtype to convert to.
334- copy : bool, default False
335- Whether to ensure that the returned value is a not a view on
336- the array. Note that ``copy=False`` does not *ensure* that
337- ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
338- a copy is made, even if not strictly necessary. This is typically
339- only possible when no missing values are present and `dtype`
340- is a boolean dtype.
341- na_value : scalar, optional
342- Scalar missing value indicator to use in numpy array. Defaults
343- to the native missing value indicator of this array (pd.NA).
344-
345- Returns
346- -------
347- numpy.ndarray
348-
349- Examples
350- --------
351- An object-dtype is the default result
352-
353- >>> a = pd.array([True, False], dtype="boolean")
354- >>> a.to_numpy()
355- array([True, False], dtype=object)
356-
357- When no missing values are present, a boolean dtype can be used.
358-
359- >>> a.to_numpy(dtype="bool")
360- array([ True, False])
361-
362- However, requesting a bool dtype will raise a ValueError if
363- missing values are present and the default missing value :attr:`NA`
364- is used.
365-
366- >>> a = pd.array([True, False, pd.NA], dtype="boolean")
367- >>> a
368- <BooleanArray>
369- [True, False, NA]
370- Length: 3, dtype: boolean
371-
372- >>> a.to_numpy(dtype="bool")
373- Traceback (most recent call last):
374- ...
375- ValueError: cannot convert to bool numpy array in presence of missing values
376-
377- Specify a valid `na_value` instead
378-
379- >>> a.to_numpy(dtype="bool", na_value=False)
380- array([ True, False, False])
381- """
382- if na_value is lib .no_default :
383- na_value = libmissing .NA
384- if dtype is None :
385- dtype = object
386- if self ._hasna :
387- if (
388- not (is_object_dtype (dtype ) or is_string_dtype (dtype ))
389- and na_value is libmissing .NA
390- ):
391- raise ValueError (
392- f"cannot convert to '{ dtype } '-dtype NumPy array "
393- "with missing values. Specify an appropriate 'na_value' "
394- "for this dtype."
395- )
396- # don't pass copy to astype -> always need a copy since we are mutating
397- data = self ._data .astype (dtype )
398- data [self ._mask ] = na_value
399- else :
400- data = self ._data .astype (dtype , copy = copy )
401- return data
402-
403- __array_priority__ = 1000 # higher than ndarray so ops dispatch to us
404-
405- def __array__ (self , dtype = None ):
406- """
407- the array interface, return my values
408- We return an object array here to preserve our scalar values
409- """
410- # by default (no dtype specified), return an object array
411- return self .to_numpy (dtype = dtype )
412-
413- def __arrow_array__ (self , type = None ):
414- """
415- Convert myself into a pyarrow Array.
416- """
417- import pyarrow as pa
418-
419- return pa .array (self ._data , mask = self ._mask , type = type )
420-
421298 _HANDLED_TYPES = (np .ndarray , numbers .Number , bool , np .bool_ )
422299
423300 def __array_ufunc__ (self , ufunc , method , * inputs , ** kwargs ):
@@ -465,40 +342,6 @@ def reconstruct(x):
465342 else :
466343 return reconstruct (result )
467344
468- def __iter__ (self ):
469- for i in range (len (self )):
470- if self ._mask [i ]:
471- yield self .dtype .na_value
472- else :
473- yield self ._data [i ]
474-
475- def take (self , indexer , allow_fill = False , fill_value = None ):
476- # we always fill with False internally
477- # to avoid upcasting
478- data_fill_value = False if isna (fill_value ) else fill_value
479- result = take (
480- self ._data , indexer , fill_value = data_fill_value , allow_fill = allow_fill
481- )
482-
483- mask = take (self ._mask , indexer , fill_value = True , allow_fill = allow_fill )
484-
485- # if we are filling
486- # we only fill where the indexer is null
487- # not existing missing values
488- # TODO(jreback) what if we have a non-na float as a fill value?
489- if allow_fill and notna (fill_value ):
490- fill_mask = np .asarray (indexer ) == - 1
491- result [fill_mask ] = fill_value
492- mask = mask ^ fill_mask
493-
494- return type (self )(result , mask , copy = False )
495-
496- def copy (self ):
497- data , mask = self ._data , self ._mask
498- data = data .copy ()
499- mask = mask .copy ()
500- return type (self )(data , mask , copy = False )
501-
502345 def __setitem__ (self , key , value ):
503346 _is_scalar = is_scalar (value )
504347 if _is_scalar :
@@ -512,26 +355,6 @@ def __setitem__(self, key, value):
512355 self ._data [key ] = value
513356 self ._mask [key ] = mask
514357
515- def __len__ (self ):
516- return len (self ._data )
517-
518- @property
519- def nbytes (self ):
520- return self ._data .nbytes + self ._mask .nbytes
521-
522- def isna (self ):
523- return self ._mask
524-
525- @property
526- def _na_value (self ):
527- return self ._dtype .na_value
528-
529- @classmethod
530- def _concat_same_type (cls , to_concat ):
531- data = np .concatenate ([x ._data for x in to_concat ])
532- mask = np .concatenate ([x ._mask for x in to_concat ])
533- return cls (data , mask )
534-
535358 def astype (self , dtype , copy = True ):
536359 """
537360 Cast to a NumPy array or ExtensionArray with 'dtype'.
0 commit comments