Skip to content
16 changes: 8 additions & 8 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ The available extras, found in the :ref:`installation guide<install.dependencies
``[all, performance, computation, timezone, fss, aws, gcp, excel, parquet, feather, hdf5, spss, postgresql, mysql,
sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` (:issue:`39164`).

.. _whatsnew_200.enhancements.io_use_nullable_dtypes_and_nullable_backend:
.. _whatsnew_200.enhancements.io_use_nullable_dtypes_and_dtype_backend:

Configuration option, ``mode.nullable_backend``, to return pyarrow-backed dtypes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Configuration option, ``mode.dtype_backend``, to return pyarrow-backed dtypes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)

Expand All @@ -41,7 +41,7 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
* :func:`read_sql_query`
* :func:`read_sql_table`

Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
Additionally a new global configuration, ``mode.dtype_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
to select the nullable dtypes implementation.

* :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
Expand All @@ -50,12 +50,12 @@ to select the nullable dtypes implementation.
* :func:`read_orc`


And the following methods will also utilize the ``mode.nullable_backend`` option.
And the following methods will also utilize the ``mode.dtype_backend`` option.

* :meth:`DataFrame.convert_dtypes`
* :meth:`Series.convert_dtypes`

By default, ``mode.nullable_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
By default, ``mode.dtype_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (:issue:`48957`, :issue:`49997`).

.. ipython:: python
Expand All @@ -65,12 +65,12 @@ be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (
1,2.5,True,a,,,,,
3,4.5,False,b,6,7.5,True,a,
""")
with pd.option_context("mode.nullable_backend", "pandas"):
with pd.option_context("mode.dtype_backend", "pandas"):
df = pd.read_csv(data, use_nullable_dtypes=True)
df.dtypes

data.seek(0)
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
df_pyarrow = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow")
df_pyarrow.dtypes

Expand Down
11 changes: 6 additions & 5 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,10 +539,11 @@ def use_inf_as_na_cb(key) -> None:
The default storage for StringDtype.
"""

nullable_backend_doc = """
dtype_backend_doc = """
: string
The nullable dtype implementation to return.
Available options: 'pandas', 'pyarrow', the default is 'pandas'.
The nullable dtype implementation to return. Only applicable to certain
operations where documented. Available options: 'pandas', 'pyarrow',
the default is 'pandas'.
"""

with cf.config_prefix("mode"):
Expand All @@ -553,9 +554,9 @@ def use_inf_as_na_cb(key) -> None:
validator=is_one_of_factory(["python", "pyarrow"]),
)
cf.register_option(
"nullable_backend",
"dtype_backend",
"pandas",
nullable_backend_doc,
dtype_backend_doc,
validator=is_one_of_factory(["pandas", "pyarrow"]),
)

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,7 @@ def convert_dtypes(
convert_boolean: bool = True,
convert_floating: bool = True,
infer_objects: bool = False,
nullable_backend: Literal["pandas", "pyarrow"] = "pandas",
dtype_backend: Literal["pandas", "pyarrow"] = "pandas",
) -> DtypeObj:
"""
Convert objects to best possible type, and optionally,
Expand All @@ -983,7 +983,7 @@ def convert_dtypes(
infer_objects : bool, defaults False
Whether to also infer objects to float/int if possible. Is only hit if the
object array contains pd.NA.
nullable_backend : str, default "pandas"
dtype_backend : str, default "pandas"
Nullable dtype implementation to use.

* "pandas" returns numpy-backed nullable types
Expand Down Expand Up @@ -1076,7 +1076,7 @@ def convert_dtypes(
else:
inferred_dtype = input_array.dtype

if nullable_backend == "pyarrow":
if dtype_backend == "pyarrow":
from pandas.core.arrays.arrow.array import to_pyarrow_type
from pandas.core.arrays.arrow.dtype import ArrowDtype
from pandas.core.arrays.string_ import StringDtype
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6435,9 +6435,9 @@ def convert_dtypes(

.. versionadded:: 2.0
The nullable dtype implementation can be configured by calling
``pd.set_option("mode.nullable_backend", "pandas")`` to use
``pd.set_option("mode.dtype_backend", "pandas")`` to use
numpy-backed nullable dtypes or
``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).

Examples
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5410,15 +5410,15 @@ def _convert_dtypes(
input_series = input_series.copy()

if convert_string or convert_integer or convert_boolean or convert_floating:
nullable_backend = get_option("mode.nullable_backend")
dtype_backend = get_option("mode.dtype_backend")
inferred_dtype = convert_dtypes(
input_series._values,
convert_string,
convert_integer,
convert_boolean,
convert_floating,
infer_objects,
nullable_backend,
dtype_backend,
)
result = input_series.astype(inferred_dtype)
else:
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,16 @@ def read_orc(
for the resulting DataFrame.

The nullable dtype implementation can be configured by calling
``pd.set_option("mode.nullable_backend", "pandas")`` to use
``pd.set_option("mode.dtype_backend", "pandas")`` to use
numpy-backed nullable dtypes or
``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).

.. versionadded:: 2.0.0

.. note

Currently only ``mode.nullable_backend`` set to ``"pyarrow"`` is supported.
Currently only ``mode.dtype_backend`` set to ``"pyarrow"`` is supported.

**kwargs
Any additional kwargs are passed to pyarrow.
Expand All @@ -90,10 +90,10 @@ def read_orc(
orc_file = orc.ORCFile(handles.handle)
pa_table = orc_file.read(columns=columns, **kwargs)
if use_nullable_dtypes:
nullable_backend = get_option("mode.nullable_backend")
if nullable_backend != "pyarrow":
dtype_backend = get_option("mode.dtype_backend")
if dtype_backend != "pyarrow":
raise NotImplementedError(
f"mode.nullable_backend set to {nullable_backend} is not implemented."
f"mode.dtype_backend set to {dtype_backend} is not implemented."
)
df = DataFrame(
{
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,12 @@ def read(
) -> DataFrame:
kwargs["use_pandas_metadata"] = True

nullable_backend = get_option("mode.nullable_backend")
dtype_backend = get_option("mode.dtype_backend")
to_pandas_kwargs = {}
if use_nullable_dtypes:
import pandas as pd

if nullable_backend == "pandas":
if dtype_backend == "pandas":
mapping = {
self.api.int8(): pd.Int8Dtype(),
self.api.int16(): pd.Int16Dtype(),
Expand Down Expand Up @@ -257,9 +257,9 @@ def read(
pa_table = self.api.parquet.read_table(
path_or_handle, columns=columns, **kwargs
)
if nullable_backend == "pandas":
if dtype_backend == "pandas":
result = pa_table.to_pandas(**to_pandas_kwargs)
elif nullable_backend == "pyarrow":
elif dtype_backend == "pyarrow":
result = DataFrame(
{
col_name: arrays.ArrowExtensionArray(pa_col)
Expand Down Expand Up @@ -509,9 +509,9 @@ def read_parquet(
.. versionadded:: 1.2.0

The nullable dtype implementation can be configured by calling
``pd.set_option("mode.nullable_backend", "pandas")`` to use
``pd.set_option("mode.dtype_backend", "pandas")`` to use
numpy-backed nullable dtypes or
``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).

.. versionadded:: 2.0.0
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def read(self) -> DataFrame:
)
if (
self.kwds["use_nullable_dtypes"]
and get_option("mode.nullable_backend") == "pyarrow"
and get_option("mode.dtype_backend") == "pyarrow"
):
frame = DataFrame(
{
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ def _infer_types(
use_nullable_dtypes: Literal[True] | Literal[False] = (
self.use_nullable_dtypes and no_dtype_specified
)
nullable_backend = get_option("mode.nullable_backend")
dtype_backend = get_option("mode.dtype_backend")
result: ArrayLike

if try_num_bool and is_object_dtype(values.dtype):
Expand Down Expand Up @@ -770,7 +770,7 @@ def _infer_types(
if inferred_type != "datetime":
result = StringDtype().construct_array_type()._from_sequence(values)

if use_nullable_dtypes and nullable_backend == "pyarrow":
if use_nullable_dtypes and dtype_backend == "pyarrow":
pa = import_optional_dependency("pyarrow")
if isinstance(result, np.ndarray):
result = ArrowExtensionArray(pa.array(result, from_pandas=True))
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,9 +399,9 @@
implementation, even if no nulls are present.

The nullable dtype implementation can be configured by calling
``pd.set_option("mode.nullable_backend", "pandas")`` to use
``pd.set_option("mode.dtype_backend", "pandas")`` to use
numpy-backed nullable dtypes or
``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).

.. versionadded:: 2.0
Expand Down Expand Up @@ -561,12 +561,12 @@ def _read(
)
elif (
kwds.get("use_nullable_dtypes", False)
and get_option("mode.nullable_backend") == "pyarrow"
and get_option("mode.dtype_backend") == "pyarrow"
and kwds.get("engine") == "c"
):
raise NotImplementedError(
f"use_nullable_dtypes=True and engine={kwds['engine']} with "
"mode.nullable_backend set to 'pyarrow' is not implemented."
"mode.dtype_backend set to 'pyarrow' is not implemented."
)
else:
chunksize = validate_integer("chunksize", chunksize, 1)
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_convert_dtypes_retain_column_names(self):
tm.assert_index_equal(result.columns, df.columns)
assert result.columns.name == "cols"

def test_pyarrow_nullable_backend(self):
def test_pyarrow_dtype_backend(self):
pa = pytest.importorskip("pyarrow")
df = pd.DataFrame(
{
Expand All @@ -56,7 +56,7 @@ def test_pyarrow_nullable_backend(self):
"f": pd.Series(pd.timedelta_range("1D", periods=3)),
}
)
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
result = df.convert_dtypes()
expected = pd.DataFrame(
{
Expand Down Expand Up @@ -90,14 +90,14 @@ def test_pyarrow_nullable_backend(self):
)
tm.assert_frame_equal(result, expected)

def test_pyarrow_nullable_backend_already_pyarrow(self):
def test_pyarrow_dtype_backend_already_pyarrow(self):
pytest.importorskip("pyarrow")
expected = pd.DataFrame([1, 2, 3], dtype="int64[pyarrow]")
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
result = expected.convert_dtypes()
tm.assert_frame_equal(result, expected)

def test_pyarrow_nullable_backend_from_pandas_nullable(self):
def test_pyarrow_dtype_backend_from_pandas_nullable(self):
pa = pytest.importorskip("pyarrow")
df = pd.DataFrame(
{
Expand All @@ -107,7 +107,7 @@ def test_pyarrow_nullable_backend_from_pandas_nullable(self):
"d": pd.Series([None, 100.5, 200], dtype="Float64"),
}
)
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
result = df.convert_dtypes()
expected = pd.DataFrame(
{
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,10 +537,10 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize(
"nullable_backend",
"dtype_backend",
["pandas", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))],
)
def test_use_nullable_dtypes(self, read_ext, nullable_backend):
def test_use_nullable_dtypes(self, read_ext, dtype_backend):
# GH#36712
if read_ext in (".xlsb", ".xls"):
pytest.skip(f"No engine for filetype: '{read_ext}'")
Expand All @@ -561,11 +561,11 @@ def test_use_nullable_dtypes(self, read_ext, nullable_backend):
)
with tm.ensure_clean(read_ext) as file_path:
df.to_excel(file_path, "test", index=False)
with pd.option_context("mode.nullable_backend", nullable_backend):
with pd.option_context("mode.dtype_backend", dtype_backend):
result = pd.read_excel(
file_path, sheet_name="test", use_nullable_dtypes=True
)
if nullable_backend == "pyarrow":
if dtype_backend == "pyarrow":
import pyarrow as pa

from pandas.arrays import ArrowExtensionArray
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/dtypes/test_dtypes_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ def test_use_nullable_dtypes_pyarrow_backend(all_parsers, request):
1,2.5,True,a,,,,,12-31-2019,
3,4.5,False,b,6,7.5,True,a,12-31-2019,
"""
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
if engine == "c":
request.node.add_marker(
pytest.mark.xfail(
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/io/test_orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,9 @@ def test_orc_use_nullable_dtypes_pandas_backend_not_supported(dirpath):
input_file = os.path.join(dirpath, "TestOrcFile.emptyFile.orc")
with pytest.raises(
NotImplementedError,
match="mode.nullable_backend set to pandas is not implemented.",
match="mode.dtype_backend set to pandas is not implemented.",
):
with pd.option_context("mode.nullable_backend", "pandas"):
with pd.option_context("mode.dtype_backend", "pandas"):
read_orc(input_file, use_nullable_dtypes=True)


Expand All @@ -337,7 +337,7 @@ def test_orc_use_nullable_dtypes_pyarrow_backend():
}
)
bytes_data = df.copy().to_orc()
with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
result = read_orc(BytesIO(bytes_data), use_nullable_dtypes=True)
expected = pd.DataFrame(
{
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,7 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full):
pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels"))
)

with pd.option_context("mode.nullable_backend", "pyarrow"):
with pd.option_context("mode.dtype_backend", "pyarrow"):
check_round_trip(
df,
engine=pa,
Expand Down