pandas-dev
diff --git a/‎pandas/core/arrays/boolean.py‎
Lines changed: 6 additions & 183 deletions b/‎pandas/core/arrays/boolean.py‎
Lines changed: 6 additions & 183 deletions
@@ -15,24 +15,19 @@
  is_extension_array_dtype,
  is_float,
  is_float_dtype,
- is_integer,
  is_integer_dtype,
  is_list_like,
  is_numeric_dtype,
- is_object_dtype,
  is_scalar,
- is_string_dtype,
  pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import register_extension_dtype
 from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import nanops, ops
-from pandas.core.algorithms import take
-from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
-import pandas.core.common as com
-from pandas.core.indexers import check_bool_array_indexer
+
+from .masked import BaseMaskedArray
 
 if TYPE_CHECKING:
  from pandas._typing import Scalar
@@ -199,7 +194,7 @@ def coerce_to_array(values, mask=None, copy: bool = False):
  return values, mask
 
 
-class BooleanArray(ExtensionArray, ExtensionOpsMixin):
+class BooleanArray(BaseMaskedArray):
  """
  Array of boolean (True/False) data with missing values.
 
@@ -253,6 +248,9 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin):
  Length: 3, dtype: boolean
  """
 
+ # The value used to fill '_data' to avoid upcasting
+ _internal_fill_value = False
+
  def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
  if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
  raise TypeError(
@@ -297,127 +295,6 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
  def _from_factorized(cls, values, original: "BooleanArray"):
  return cls._from_sequence(values, dtype=original.dtype)
 
- def _formatter(self, boxed=False):
- return str
-
- @property
- def _hasna(self) -> bool:
- # Note: this is expensive right now! The hope is that we can
- # make this faster by having an optional mask, but not have to change
- # source code using it..
- return self._mask.any()
-
- def __getitem__(self, item):
- if is_integer(item):
- if self._mask[item]:
- return self.dtype.na_value
- return self._data[item]
-
- elif com.is_bool_indexer(item):
- item = check_bool_array_indexer(self, item)
-
- return type(self)(self._data[item], self._mask[item])
-
- def to_numpy(
- self, dtype=None, copy=False, na_value: "Scalar" = lib.no_default,
- ):
- """
- Convert to a NumPy Array.
-
- By default converts to an object-dtype NumPy array. Specify the `dtype` and
- `na_value` keywords to customize the conversion.
-
- Parameters
- ----------
- dtype : dtype, default object
- The numpy dtype to convert to.
- copy : bool, default False
- Whether to ensure that the returned value is a not a view on
- the array. Note that ``copy=False`` does not *ensure* that
- ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
- a copy is made, even if not strictly necessary. This is typically
- only possible when no missing values are present and `dtype`
- is a boolean dtype.
- na_value : scalar, optional
- Scalar missing value indicator to use in numpy array. Defaults
- to the native missing value indicator of this array (pd.NA).
-
- Returns
- -------
- numpy.ndarray
-
- Examples
- --------
- An object-dtype is the default result
-
- >>> a = pd.array([True, False], dtype="boolean")
- >>> a.to_numpy()
- array([True, False], dtype=object)
-
- When no missing values are present, a boolean dtype can be used.
-
- >>> a.to_numpy(dtype="bool")
- array([ True, False])
-
- However, requesting a bool dtype will raise a ValueError if
- missing values are present and the default missing value :attr:`NA`
- is used.
-
- >>> a = pd.array([True, False, pd.NA], dtype="boolean")
- >>> a
- <BooleanArray>
- [True, False, NA]
- Length: 3, dtype: boolean
-
- >>> a.to_numpy(dtype="bool")
- Traceback (most recent call last):
- ...
- ValueError: cannot convert to bool numpy array in presence of missing values
-
- Specify a valid `na_value` instead
-
- >>> a.to_numpy(dtype="bool", na_value=False)
- array([ True, False, False])
- """
- if na_value is lib.no_default:
- na_value = libmissing.NA
- if dtype is None:
- dtype = object
- if self._hasna:
- if (
- not (is_object_dtype(dtype) or is_string_dtype(dtype))
- and na_value is libmissing.NA
- ):
- raise ValueError(
- f"cannot convert to '{dtype}'-dtype NumPy array "
- "with missing values. Specify an appropriate 'na_value' "
- "for this dtype."
- )
- # don't pass copy to astype -> always need a copy since we are mutating
- data = self._data.astype(dtype)
- data[self._mask] = na_value
- else:
- data = self._data.astype(dtype, copy=copy)
- return data
-
- __array_priority__ = 1000 # higher than ndarray so ops dispatch to us
-
- def __array__(self, dtype=None):
- """
- the array interface, return my values
- We return an object array here to preserve our scalar values
- """
- # by default (no dtype specified), return an object array
- return self.to_numpy(dtype=dtype)
-
- def __arrow_array__(self, type=None):
- """
- Convert myself into a pyarrow Array.
- """
- import pyarrow as pa
-
- return pa.array(self._data, mask=self._mask, type=type)
-
  _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_)
 
  def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
@@ -465,40 +342,6 @@ def reconstruct(x):
  else:
  return reconstruct(result)
 
- def __iter__(self):
- for i in range(len(self)):
- if self._mask[i]:
- yield self.dtype.na_value
- else:
- yield self._data[i]
-
- def take(self, indexer, allow_fill=False, fill_value=None):
- # we always fill with False internally
- # to avoid upcasting
- data_fill_value = False if isna(fill_value) else fill_value
- result = take(
- self._data, indexer, fill_value=data_fill_value, allow_fill=allow_fill
- )
-
- mask = take(self._mask, indexer, fill_value=True, allow_fill=allow_fill)
-
- # if we are filling
- # we only fill where the indexer is null
- # not existing missing values
- # TODO(jreback) what if we have a non-na float as a fill value?
- if allow_fill and notna(fill_value):
- fill_mask = np.asarray(indexer) == -1
- result[fill_mask] = fill_value
- mask = mask ^ fill_mask
-
- return type(self)(result, mask, copy=False)
-
- def copy(self):
- data, mask = self._data, self._mask
- data = data.copy()
- mask = mask.copy()
- return type(self)(data, mask, copy=False)
-
  def __setitem__(self, key, value):
  _is_scalar = is_scalar(value)
  if _is_scalar:
@@ -512,26 +355,6 @@ def __setitem__(self, key, value):
  self._data[key] = value
  self._mask[key] = mask
 
- def __len__(self):
- return len(self._data)
-
- @property
- def nbytes(self):
- return self._data.nbytes + self._mask.nbytes
-
- def isna(self):
- return self._mask
-
- @property
- def _na_value(self):
- return self._dtype.na_value
-
- @classmethod
- def _concat_same_type(cls, to_concat):
- data = np.concatenate([x._data for x in to_concat])
- mask = np.concatenate([x._mask for x in to_concat])
- return cls(data, mask)
-
  def astype(self, dtype, copy=True):
  """
  Cast to a NumPy array or ExtensionArray with 'dtype'.