pandas-dev · phofl · Dec 22, 2022 · Dec 16, 2022 · Dec 16, 2022 · Dec 17, 2022
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -28,10 +28,10 @@ The available extras, found in the :ref:`installation guide<install.dependencies
 ``[all, performance, computation, timezone, fss, aws, gcp, excel, parquet, feather, hdf5, spss, postgresql, mysql,
 sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` (:issue:`39164`).
 
-.. _whatsnew_200.enhancements.io_use_nullable_dtypes_and_nullable_backend:
+.. _whatsnew_200.enhancements.io_use_nullable_dtypes_and_dtype_backend:
 
-Configuration option, ``mode.nullable_backend``, to return pyarrow-backed dtypes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Configuration option, ``mode.dtype_backend``, to return pyarrow-backed dtypes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)
 
@@ -41,7 +41,7 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
 * :func:`read_sql_query`
 * :func:`read_sql_table`
 
-Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
+Additionally a new global configuration, ``mode.dtype_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
 to select the nullable dtypes implementation.
 
 * :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
@@ -50,12 +50,12 @@ to select the nullable dtypes implementation.
 * :func:`read_orc`
 
 
-And the following methods will also utilize the ``mode.nullable_backend`` option.
+And the following methods will also utilize the ``mode.dtype_backend`` option.
 
 * :meth:`DataFrame.convert_dtypes`
 * :meth:`Series.convert_dtypes`
 
-By default, ``mode.nullable_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
+By default, ``mode.dtype_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
 be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (:issue:`48957`, :issue:`49997`).
 
 .. ipython:: python
@@ -65,12 +65,12 @@ be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (
  1,2.5,True,a,,,,,
  3,4.5,False,b,6,7.5,True,a,
  """)
- with pd.option_context("mode.nullable_backend", "pandas"):
+ with pd.option_context("mode.dtype_backend", "pandas"):
  df = pd.read_csv(data, use_nullable_dtypes=True)
  df.dtypes
 
  data.seek(0)
- with pd.option_context("mode.nullable_backend", "pyarrow"):
+ with pd.option_context("mode.dtype_backend", "pyarrow"):
  df_pyarrow = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow")
  df_pyarrow.dtypes
 

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -539,10 +539,11 @@ def use_inf_as_na_cb(key) -> None:
  The default storage for StringDtype.
 """
 
-nullable_backend_doc = """
+dtype_backend_doc = """
 : string
- The nullable dtype implementation to return.
- Available options: 'pandas', 'pyarrow', the default is 'pandas'.
+ The nullable dtype implementation to return. Only applicable to certain
+ operations where documented. Available options: 'pandas', 'pyarrow',
+ the default is 'pandas'.
 """
 
 with cf.config_prefix("mode"):
@@ -553,9 +554,9 @@ def use_inf_as_na_cb(key) -> None:
  validator=is_one_of_factory(["python", "pyarrow"]),
  )
  cf.register_option(
- "nullable_backend",
+ "dtype_backend",
  "pandas",
- nullable_backend_doc,
+ dtype_backend_doc,
  validator=is_one_of_factory(["pandas", "pyarrow"]),
  )
 

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -961,7 +961,7 @@ def convert_dtypes(
  convert_boolean: bool = True,
  convert_floating: bool = True,
  infer_objects: bool = False,
- nullable_backend: Literal["pandas", "pyarrow"] = "pandas",
+ dtype_backend: Literal["pandas", "pyarrow"] = "pandas",
 ) -> DtypeObj:
  """
  Convert objects to best possible type, and optionally,
@@ -983,7 +983,7 @@ def convert_dtypes(
  infer_objects : bool, defaults False
  Whether to also infer objects to float/int if possible. Is only hit if the
  object array contains pd.NA.
- nullable_backend : str, default "pandas"
+ dtype_backend : str, default "pandas"
  Nullable dtype implementation to use.
 
  * "pandas" returns numpy-backed nullable types
@@ -1076,7 +1076,7 @@ def convert_dtypes(
  else:
  inferred_dtype = input_array.dtype
 
- if nullable_backend == "pyarrow":
+ if dtype_backend == "pyarrow":
  from pandas.core.arrays.arrow.array import to_pyarrow_type
  from pandas.core.arrays.arrow.dtype import ArrowDtype
  from pandas.core.arrays.string_ import StringDtype

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6435,9 +6435,9 @@ def convert_dtypes(
 
  .. versionadded:: 2.0
  The nullable dtype implementation can be configured by calling
- ``pd.set_option("mode.nullable_backend", "pandas")`` to use
+ ``pd.set_option("mode.dtype_backend", "pandas")`` to use
  numpy-backed nullable dtypes or
- ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
+ ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
  pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
  Examples

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -5410,15 +5410,15 @@ def _convert_dtypes(
  input_series = input_series.copy()
 
  if convert_string or convert_integer or convert_boolean or convert_floating:
- nullable_backend = get_option("mode.nullable_backend")
+ dtype_backend = get_option("mode.dtype_backend")
  inferred_dtype = convert_dtypes(
  input_series._values,
  convert_string,
  convert_integer,
  convert_boolean,
  convert_floating,
  infer_objects,
- nullable_backend,
+ dtype_backend,
  )
  result = input_series.astype(inferred_dtype)
  else:

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
@@ -59,16 +59,16 @@ def read_orc(
  for the resulting DataFrame.
 
  The nullable dtype implementation can be configured by calling
- ``pd.set_option("mode.nullable_backend", "pandas")`` to use
+ ``pd.set_option("mode.dtype_backend", "pandas")`` to use
  numpy-backed nullable dtypes or
- ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
+ ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
  pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
  .. versionadded:: 2.0.0
 
  .. note
 
- Currently only ``mode.nullable_backend`` set to ``"pyarrow"`` is supported.
+ Currently only ``mode.dtype_backend`` set to ``"pyarrow"`` is supported.
 
  **kwargs
  Any additional kwargs are passed to pyarrow.
@@ -90,10 +90,10 @@ def read_orc(
  orc_file = orc.ORCFile(handles.handle)
  pa_table = orc_file.read(columns=columns, **kwargs)
  if use_nullable_dtypes:
- nullable_backend = get_option("mode.nullable_backend")
- if nullable_backend != "pyarrow":
+ dtype_backend = get_option("mode.dtype_backend")
+ if dtype_backend != "pyarrow":
  raise NotImplementedError(
- f"mode.nullable_backend set to {nullable_backend} is not implemented."
+ f"mode.dtype_backend set to {dtype_backend} is not implemented."
  )
  df = DataFrame(
  {

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -222,12 +222,12 @@ def read(
  ) -> DataFrame:
  kwargs["use_pandas_metadata"] = True
 
- nullable_backend = get_option("mode.nullable_backend")
+ dtype_backend = get_option("mode.dtype_backend")
  to_pandas_kwargs = {}
  if use_nullable_dtypes:
  import pandas as pd
 
- if nullable_backend == "pandas":
+ if dtype_backend == "pandas":
  mapping = {
  self.api.int8(): pd.Int8Dtype(),
  self.api.int16(): pd.Int16Dtype(),
@@ -257,9 +257,9 @@ def read(
  pa_table = self.api.parquet.read_table(
  path_or_handle, columns=columns, **kwargs
  )
- if nullable_backend == "pandas":
+ if dtype_backend == "pandas":
  result = pa_table.to_pandas(**to_pandas_kwargs)
- elif nullable_backend == "pyarrow":
+ elif dtype_backend == "pyarrow":
  result = DataFrame(
  {
  col_name: arrays.ArrowExtensionArray(pa_col)
@@ -509,9 +509,9 @@ def read_parquet(
  .. versionadded:: 1.2.0
 
  The nullable dtype implementation can be configured by calling
- ``pd.set_option("mode.nullable_backend", "pandas")`` to use
+ ``pd.set_option("mode.dtype_backend", "pandas")`` to use
  numpy-backed nullable dtypes or
- ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
+ ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
  pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
  .. versionadded:: 2.0.0

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -151,7 +151,7 @@ def read(self) -> DataFrame:
  )
  if (
  self.kwds["use_nullable_dtypes"]
- and get_option("mode.nullable_backend") == "pyarrow"
+ and get_option("mode.dtype_backend") == "pyarrow"
  ):
  frame = DataFrame(
  {

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -712,7 +712,7 @@ def _infer_types(
  use_nullable_dtypes: Literal[True] | Literal[False] = (
  self.use_nullable_dtypes and no_dtype_specified
  )
- nullable_backend = get_option("mode.nullable_backend")
+ dtype_backend = get_option("mode.dtype_backend")
  result: ArrayLike
 
  if try_num_bool and is_object_dtype(values.dtype):
@@ -770,7 +770,7 @@ def _infer_types(
  if inferred_type != "datetime":
  result = StringDtype().construct_array_type()._from_sequence(values)
 
- if use_nullable_dtypes and nullable_backend == "pyarrow":
+ if use_nullable_dtypes and dtype_backend == "pyarrow":
  pa = import_optional_dependency("pyarrow")
  if isinstance(result, np.ndarray):
  result = ArrowExtensionArray(pa.array(result, from_pandas=True))

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -399,9 +399,9 @@
  implementation, even if no nulls are present.
 
  The nullable dtype implementation can be configured by calling
- ``pd.set_option("mode.nullable_backend", "pandas")`` to use
+ ``pd.set_option("mode.dtype_backend", "pandas")`` to use
  numpy-backed nullable dtypes or
- ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
+ ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
  pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
  .. versionadded:: 2.0
@@ -561,12 +561,12 @@ def _read(
  )
  elif (
  kwds.get("use_nullable_dtypes", False)
- and get_option("mode.nullable_backend") == "pyarrow"
+ and get_option("mode.dtype_backend") == "pyarrow"
  and kwds.get("engine") == "c"
  ):
  raise NotImplementedError(
  f"use_nullable_dtypes=True and engine={kwds['engine']} with "
- "mode.nullable_backend set to 'pyarrow' is not implemented."
+ "mode.dtype_backend set to 'pyarrow' is not implemented."
  )
  else:
  chunksize = validate_integer("chunksize", chunksize, 1)

diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -44,7 +44,7 @@ def test_convert_dtypes_retain_column_names(self):
  tm.assert_index_equal(result.columns, df.columns)
  assert result.columns.name == "cols"
 
- def test_pyarrow_nullable_backend(self):
+ def test_pyarrow_dtype_backend(self):
  pa = pytest.importorskip("pyarrow")
  df = pd.DataFrame(
  {
@@ -56,7 +56,7 @@ def test_pyarrow_nullable_backend(self):
  "f": pd.Series(pd.timedelta_range("1D", periods=3)),
  }
  )
- with pd.option_context("mode.nullable_backend", "pyarrow"):
+ with pd.option_context("mode.dtype_backend", "pyarrow"):
  result = df.convert_dtypes()
  expected = pd.DataFrame(
  {
@@ -90,14 +90,14 @@ def test_pyarrow_nullable_backend(self):
  )
  tm.assert_frame_equal(result, expected)
 
- def test_pyarrow_nullable_backend_already_pyarrow(self):
+ def test_pyarrow_dtype_backend_already_pyarrow(self):
  pytest.importorskip("pyarrow")
  expected = pd.DataFrame([1, 2, 3], dtype="int64[pyarrow]")
- with pd.option_context("mode.nullable_backend", "pyarrow"):
+ with pd.option_context("mode.dtype_backend", "pyarrow"):
  result = expected.convert_dtypes()
  tm.assert_frame_equal(result, expected)
 
- def test_pyarrow_nullable_backend_from_pandas_nullable(self):
+ def test_pyarrow_dtype_backend_from_pandas_nullable(self):
  pa = pytest.importorskip("pyarrow")
  df = pd.DataFrame(
  {
@@ -107,7 +107,7 @@ def test_pyarrow_nullable_backend_from_pandas_nullable(self):
  "d": pd.Series([None, 100.5, 200], dtype="Float64"),
  }
  )
- with pd.option_context("mode.nullable_backend", "pyarrow"):
+ with pd.option_context("mode.dtype_backend", "pyarrow"):
  result = df.convert_dtypes()
  expected = pd.DataFrame(
  {

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -537,10 +537,10 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
  tm.assert_frame_equal(actual, expected)
 
  @pytest.mark.parametrize(
- "nullable_backend",
+ "dtype_backend",
  ["pandas", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))],
  )
- def test_use_nullable_dtypes(self, read_ext, nullable_backend):
+ def test_use_nullable_dtypes(self, read_ext, dtype_backend):
  # GH#36712
  if read_ext in (".xlsb", ".xls"):
  pytest.skip(f"No engine for filetype: '{read_ext}'")
@@ -561,11 +561,11 @@ def test_use_nullable_dtypes(self, read_ext, nullable_backend):
  )
  with tm.ensure_clean(read_ext) as file_path:
  df.to_excel(file_path, "test", index=False)
- with pd.option_context("mode.nullable_backend", nullable_backend):
+ with pd.option_context("mode.dtype_backend", dtype_backend):
  result = pd.read_excel(
  file_path, sheet_name="test", use_nullable_dtypes=True
  )
- if nullable_backend == "pyarrow":
+ if dtype_backend == "pyarrow":
  import pyarrow as pa
 
  from pandas.arrays import ArrowExtensionArray

diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -498,7 +498,7 @@ def test_use_nullable_dtypes_pyarrow_backend(all_parsers, request):
 1,2.5,True,a,,,,,12-31-2019,
 3,4.5,False,b,6,7.5,True,a,12-31-2019,
 """
- with pd.option_context("mode.nullable_backend", "pyarrow"):
+ with pd.option_context("mode.dtype_backend", "pyarrow"):
  if engine == "c":
  request.node.add_marker(
  pytest.mark.xfail(

diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
@@ -309,9 +309,9 @@ def test_orc_use_nullable_dtypes_pandas_backend_not_supported(dirpath):
  input_file = os.path.join(dirpath, "TestOrcFile.emptyFile.orc")
  with pytest.raises(
  NotImplementedError,
- match="mode.nullable_backend set to pandas is not implemented.",
+ match="mode.dtype_backend set to pandas is not implemented.",
  ):
- with pd.option_context("mode.nullable_backend", "pandas"):
+ with pd.option_context("mode.dtype_backend", "pandas"):
  read_orc(input_file, use_nullable_dtypes=True)
 
 
@@ -337,7 +337,7 @@ def test_orc_use_nullable_dtypes_pyarrow_backend():
  }
  )
  bytes_data = df.copy().to_orc()
- with pd.option_context("mode.nullable_backend", "pyarrow"):
+ with pd.option_context("mode.dtype_backend", "pyarrow"):
  result = read_orc(BytesIO(bytes_data), use_nullable_dtypes=True)
  expected = pd.DataFrame(
  {

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -1037,7 +1037,7 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full):
  pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels"))
  )
 
- with pd.option_context("mode.nullable_backend", "pyarrow"):
+ with pd.option_context("mode.dtype_backend", "pyarrow"):
  check_round_trip(
  df,
  engine=pa,