Fixed FutureWarning emitting logic, reverted openpyxl workaround

pandas-dev · jreback · Dec 1, 2020 · Nov 2, 2019 · Aug 23, 2020 · Aug 26, 2020
commit 32333817a8c4ff0ac644bd6f65f3b6f69a46396d
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -484,7 +484,7 @@ Deprecations
 - Deprecated :meth:`Index.asi8` for :class:`Index` subclasses other than :class:`DatetimeIndex`, :class:`TimedeltaIndex`, and :class:`PeriodIndex` (:issue:`37877`)
 - The ``inplace`` parameter of :meth:`Categorical.remove_unused_categories` is deprecated and will be removed in a future version (:issue:`37643`)
 - The ``null_counts`` parameter of :meth:`DataFrame.info` is deprecated and replaced by ``show_counts``. It will be removed in a future version (:issue:`37999`)
-- :func:`read_excel` "xlrd" engine is deprecated. The recommended engine is "openpyxl" for "xlsx" and "xlsm" files, because "xlrd" is no longer maintained (:issue:`28547`).
+- Deprecated the default argument ``engine=None`` of the function :func:`read_excel`, which uses the no longer maintained xlrd engine. Not specifying the engine will raise a ``FutureWarning``. This argument will default to ``"openpyxl"`` in a future version, which is now the recommended engine for xlsx and xlsm files (:issue:`28547`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -102,10 +102,15 @@
  If io is not a buffer or path, this must be set to identify io.
  Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd".
  Engine compatibility :
- - "xlrd" supports most old/new Excel file formats.
+ - "xlrd" supports most old/new Excel file formats but is no longer maintained.
  - "openpyxl" supports newer Excel file formats.
  - "odf" supports OpenDocument file formats (.odf, .ods, .odt).
  - "pyxlsb" supports Binary Excel files.
+
+ .. deprecated:: 1.2.0
+ The default value ``None`` is deprecated and will be changed to ``"openpyxl"``
+ in a future version. Not specifying an engine will raise a FutureWarning.
+
 converters : dict, default None
  Dict of functions for converting values in certain columns. Keys can
  either be integers or column labels, values are functions that take one
@@ -881,10 +886,15 @@ class ExcelFile:
  Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
  default ``xlrd`` for .xls* files, ``odf`` for .ods files.
  Engine compatibility :
- - ``xlrd`` supports most old/new Excel file formats.
+ - ``xlrd`` supports most old/new Excel file formats but is no longer maintained.
  - ``openpyxl`` supports newer Excel file formats.
  - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).
  - ``pyxlsb`` supports Binary Excel files.
+
+ .. deprecated:: 1.2.0
+ The default value ``None`` is deprecated and will be changed to
+ ``"openpyxl"`` in a future version. Not specifying an engine will
+ raise a FutureWarning.
  """
 
  from pandas.io.excel._odfreader import ODFReader
@@ -902,26 +912,21 @@ class ExcelFile:
  def __init__(
  self, path_or_buffer, engine=None, storage_options: StorageOptions = None
  ):
- ext = None
- if not isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
- ext = os.path.splitext(str(path_or_buffer))[-1][1:]
-
  if engine is None:
+ warnings.warn(
+ "The default argument engine=None is deprecated. "
+ "Specify the engine argument to suppress this warning.",
+ FutureWarning,
+ stacklevel=4,
+ )
  engine = "xlrd"
  if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
  if _is_ods_stream(path_or_buffer):
  engine = "odf"
  else:
- if ext == "ods":
+ ext = os.path.splitext(str(path_or_buffer))[-1]
+ if ext == ".ods":
  engine = "odf"
-
- elif engine == "xlrd" and ext in ("xlsx", "xlsm"):
- warnings.warn(
- 'The Excel reader engine "xlrd" is deprecated, use "openpyxl" instead. '
- 'Specify engine="openpyxl" to suppress this warning.',
- FutureWarning,
- stacklevel=2,
- )
  if engine not in self._engines:
  raise ValueError(f"Unknown engine: {engine}")
 

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
@@ -1,4 +1,3 @@
-from datetime import datetime
 from typing import TYPE_CHECKING, Dict, List, Optional
 
 import numpy as np
@@ -503,11 +502,7 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
  from openpyxl.cell.cell import TYPE_BOOL, TYPE_ERROR, TYPE_NUMERIC
 
  if cell.is_date:
- try:
- # workaround for inaccurate timestamp notation in excel
- return datetime.fromtimestamp(round(cell.value.timestamp()))
- except (AttributeError, OSError):
- return cell.value
+ return cell.value
  elif cell.data_type == TYPE_ERROR:
  return np.nan
  elif cell.data_type == TYPE_BOOL:

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -21,10 +21,6 @@
  "xlrd",
  marks=[
  td.skip_if_no("xlrd"),
- pytest.mark.filterwarnings("ignore:.*(tree\\.iter|html argument)"),
- pytest.mark.filterwarnings(
- 'ignore:The Excel reader engine "xlrd" is deprecated,'
- ),
  ],
  ),
  pytest.param(
@@ -984,10 +980,7 @@ def test_read_excel_squeeze(self, read_ext):
  expected = Series([1, 2, 3], name="a")
  tm.assert_series_equal(actual, expected)
 
- def test_deprecated_kwargs(self, engine, read_ext):
- if engine == "xlrd":
- pytest.skip("Use of xlrd engine produces a FutureWarning as well")
-
+ def test_deprecated_kwargs(self, read_ext):
  with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False):
  pd.read_excel("test1" + read_ext, "Sheet1", 0)
 

diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
@@ -1197,9 +1197,6 @@ def test_datetimes(self, path):
 
  tm.assert_series_equal(write_frame["A"], read_frame["A"])
 
- @pytest.mark.filterwarnings(
- 'ignore:The Excel reader engine "xlrd" is deprecated:FutureWarning'
- )
  def test_bytes_io(self, engine):
  # see gh-7074
  bio = BytesIO()

diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
@@ -17,9 +17,6 @@ def skip_ods_and_xlsb_files(read_ext):
  pytest.skip("Not valid for xlrd")
 
 
-@pytest.mark.filterwarnings(
- 'ignore:The Excel reader engine "xlrd" is deprecated:FutureWarning'
-)
 def test_read_xlrd_book(read_ext, frame):
  df = frame
 
@@ -39,9 +36,6 @@ def test_read_xlrd_book(read_ext, frame):
 
 
 # TODO: test for openpyxl as well
-@pytest.mark.filterwarnings(
- 'ignore:The Excel reader engine "xlrd" is deprecated:FutureWarning'
-)
 def test_excel_table_sheet_by_index(datapath, read_ext):
  path = datapath("io", "data", "excel", f"test1{read_ext}")
  with ExcelFile(path, engine="xlrd") as excel:
@@ -52,18 +46,20 @@ def test_excel_table_sheet_by_index(datapath, read_ext):
 def test_excel_file_warning_with_xlsx_file(datapath):
  # GH 29375
  path = datapath("io", "data", "excel", "test1.xlsx")
+ # DeprecationWarning: "This method will be removed in future versions.
+ # Use 'tree.iter()' or 'list(tree.iter())' instead."
  with tm.assert_produces_warning(
- FutureWarning, check_stacklevel=True, raise_on_extra_warnings=False
- ) as w:
- pd.ExcelFile(path, engine="xlrd")
- assert '"xlrd" is deprecated, use "openpyxl" instead.' in str(w[0].message)
+ FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False
+ ):
+ ExcelFile(path, engine=None)
 
 
 def test_read_excel_warning_with_xlsx_file(tmpdir, datapath):
  # GH 29375
  path = datapath("io", "data", "excel", "test1.xlsx")
+ # DeprecationWarning: "This method will be removed in future versions.
+ # Use 'tree.iter()' or 'list(tree.iter())' instead."
  with tm.assert_produces_warning(
- FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False
- ) as w:
- pd.read_excel(path, "Sheet1", engine="xlrd")
- assert '"xlrd" is deprecated, use "openpyxl" instead.' in str(w[0].message)
+ FutureWarning, check_stacklevel=True, raise_on_extra_warnings=False
+ ):
+ pd.read_excel(path, "Sheet1", engine=None)