Revised to change implementation to integrate with the original Style…

…r.format method Change the implementation to integrate with the original `.format()` method by `na_rep` parameter Add a new table-wise default `na_rep` setting, which can be set through the new `.set_na_rep()` method Also enhanced the `.highlight_null()` method to be able to use `subset` parameter Add a few user guide examples and test cases
pandas-dev · TomAugspurger · Nov 25, 2019 · Oct 20, 2019 · Oct 21, 2019 · Oct 22, 2019
commit 53b0843f8a8efdcc0eb2a1f686611112f77ba275
diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst
@@ -41,6 +41,7 @@ Style application
  Styler.set_caption
  Styler.set_properties
  Styler.set_uuid
+ Styler.set_na_rep
  Styler.clear
  Styler.pipe
 
@@ -52,7 +53,6 @@ Builtin styles
  Styler.highlight_max
  Styler.highlight_min
  Styler.highlight_null
- Styler.format_null
  Styler.background_gradient
  Styler.bar
 

diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
@@ -67,7 +67,8 @@
  "df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n",
  "df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n",
  " axis=1)\n",
- "df.iloc[0, 2] = np.nan"
+ "df.iloc[0, 2] = np.nan\n",
+ "df.iloc[3, 3] = np.nan"
  ]
  },
  {
@@ -402,6 +403,38 @@
  "df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})"
  ]
  },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can format the text displayed for missing values by `na_rep`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.style.format(\"{:.2%}\", na_rep='-')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "These formatting techniques can be used in combination with styling."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.style.highlight_max(axis=0).format(na_rep='-')"
+ ]
+ },
  {
  "cell_type": "markdown",
  "metadata": {},
@@ -492,22 +525,6 @@
  "df.style.highlight_max(axis=0)"
  ]
  },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "You can format the text displayed for missing values by `.format_null`."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "df.style.highlight_max(axis=0).format_null(na_rep='-')"
- ]
- },
  {
  "cell_type": "markdown",
  "metadata": {},
@@ -675,6 +692,7 @@
  "- precision\n",
  "- captions\n",
  "- table-wide styles\n",
+ "- missing values representation\n",
  "- hiding the index or columns\n",
  "\n",
  "Each of these can be specified in two ways:\n",
@@ -816,6 +834,33 @@
  "We hope to collect some useful ones either in pandas, or preferable in a new package that [builds on top](#Extensibility) the tools here."
  ]
  },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Missing values"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can control the default missing values representation for this table through the `set_na_rep` method."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "(df.style\n",
+ " .set_na_rep('BAD')\n",
+ " .highlight_null('red')\n",
+ " .format(na_rep='GOOD', subset=['D'])\n",
+ " .highlight_null('green', subset=['D']))"
+ ]
+ },
  {
  "cell_type": "markdown",
  "metadata": {},

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -110,7 +110,7 @@ Other enhancements
 - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`)
 - :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`)
 - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`)
-- :meth:`Styler.format_null` is now added into the built-in functions to help formatting missing values (:issue:`28358`)
+- Added ``na_rep`` parameters to :meth:`DataFrame.style` and :meth:`Styler.format` to help formatting missing values (:issue:`28358`)
 
 Build Changes
 ^^^^^^^^^^^^^

diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -71,6 +71,9 @@ class Styler:
  The ``id`` takes the form ``T_<uuid>_row<num_row>_col<num_col>``
  where ``<uuid>`` is the unique identifier, ``<num_row>`` is the row
  number and ``<num_col>`` is the column number.
+ na_rep : str or None, default None
+ Representation for missing values.
+ If ``na_rep`` is None, no special formatting is applied
 
  Attributes
  ----------
@@ -126,6 +129,7 @@ def __init__(
  caption=None,
  table_attributes=None,
  cell_ids=True,
+ na_rep=None,
  ):
  self.ctx = defaultdict(list)
  self._todo = []
@@ -151,11 +155,14 @@ def __init__(
  self.hidden_index = False
  self.hidden_columns = []
  self.cell_ids = cell_ids
+ self.na_rep = na_rep
 
  # display_funcs maps (row, col) -> formatting function
 
  def default_display_func(x):
- if is_float(x):
+ if self.na_rep is not None and pd.isna(x):
+ return self.na_rep
+ elif is_float(x):
  return "{:>.{precision}g}".format(x, precision=self.precision)
  else:
  return x
@@ -415,16 +422,20 @@ def format_attr(pair):
  table_attributes=table_attr,
  )
 
- def format(self, formatter, subset=None):
+ def format(self, formatter=None, subset=None, na_rep=None):
  """
  Format the text display value of cells.
 
  Parameters
  ----------
- formatter : str, callable, or dict
+ formatter : str, callable, dict or None
+ If ``formatter`` is None, the default formatter is used
  subset : IndexSlice
  An argument to ``DataFrame.loc`` that restricts which elements
  ``formatter`` is applied to.
+ na_rep : str or None, default None
+ Representation for missing values.
+ If ``na_rep`` is None, no special formatting is applied
 
  Returns
  -------
@@ -450,6 +461,9 @@ def format(self, formatter, subset=None):
  >>> df['c'] = ['a', 'b', 'c', 'd']
  >>> df.style.format({'c': str.upper})
  """
+ if formatter is None:
+ formatter = self._display_funcs.default_factory()
+
  if subset is None:
  row_locs = range(len(self.data))
  col_locs = range(len(self.data.columns))
@@ -466,15 +480,17 @@ def format(self, formatter, subset=None):
  for col, col_formatter in formatter.items():
  # formatter must be callable, so '{}' are converted to lambdas
  col_formatter = _maybe_wrap_formatter(col_formatter)
+ col_formatter = _maybe_wrap_na_formatter(col_formatter, na_rep)
  col_num = self.data.columns.get_indexer_for([col])[0]
 
  for row_num in row_locs:
  self._display_funcs[(row_num, col_num)] = col_formatter
  else:
  # single scalar to format all cells with
+ formatter = _maybe_wrap_formatter(formatter)
+ formatter = _maybe_wrap_na_formatter(formatter, na_rep)
  locs = product(*(row_locs, col_locs))
  for i, j in locs:
- formatter = _maybe_wrap_formatter(formatter)
  self._display_funcs[(i, j)] = formatter
  return self
 
@@ -554,6 +570,7 @@ def _copy(self, deepcopy=False):
  caption=self.caption,
  uuid=self.uuid,
  table_styles=self.table_styles,
+ na_rep=self.na_rep,
  )
  if deepcopy:
  styler.ctx = copy.deepcopy(self.ctx)
@@ -892,6 +909,23 @@ def set_table_styles(self, table_styles):
  self.table_styles = table_styles
  return self
 
+ def set_na_rep(self, na_rep):
- def set_na_rep(self, na_rep):
+ def set_na_rep(self, na_rep: str) -> "Styler":
- def set_na_rep(self, na_rep):
+ def set_na_rep(self, na_rep: str) -> "Styler":
+ """
+ Set the missing data representation on a Styler.
+
+ .. versionadded:: 1.0.0
+
+ Parameters
+ ----------
+ na_rep : str
+
+ Returns
+ -------
+ self : Styler
+ """
+ self.na_rep = na_rep
+ return self
+
  def hide_index(self):
  """
  Hide any indices from rendering.
@@ -930,44 +964,27 @@ def hide_columns(self, subset):
  # A collection of "builtin" styles
  # -----------------------------------------------------------------------
 
- def format_null(self, na_rep="-"):
- """
- Format the text displayed for missing values.
-
- .. versionadded:: 1.0.0
-
- Parameters
- ----------
- na_rep : str
-
- Returns
- -------
- self : Styler
- """
- self.format(
- lambda x: na_rep if pd.isna(x) else self._display_funcs.default_factory()(x)
- )
- return self
-
  @staticmethod
  def _highlight_null(v, null_color):
  return (
  "background-color: {color}".format(color=null_color) if pd.isna(v) else ""
  )
 
- def highlight_null(self, null_color="red"):
+ def highlight_null(self, null_color="red", subset=None):
  """
  Shade the background ``null_color`` for missing values.
 
  Parameters
  ----------
  null_color : str
+ subset : IndexSlice, default None
+ A valid slice for ``data`` to limit the style application to.
 
  Returns
  -------
  self : Styler
  """
- self.applymap(self._highlight_null, null_color=null_color)
+ self.applymap(self._highlight_null, null_color=null_color, subset=subset)
  return self
 
  def background_gradient(
@@ -1498,3 +1515,13 @@ def _maybe_wrap_formatter(formatter):
  "instead".format(formatter=formatter)
  )
  raise TypeError(msg)
+
+
+def _maybe_wrap_na_formatter(formatter, na_rep):
+ if na_rep is None:
+ return formatter
+ elif is_string_like(na_rep):
+ return lambda x: na_rep if pd.isna(x) else formatter(x)
+ else:
+ msg = "Expected a string, got {na_rep} instead".format(na_rep=na_rep)
+ raise TypeError(msg)
diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py
@@ -990,10 +990,18 @@ def test_bar_bad_align_raises(self):
  with pytest.raises(ValueError):
  df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"])
 
- def test_format_null(self, na_rep="-"):
+ def test_set_na_rep(self):
  # GH 28358
  df = pd.DataFrame({"A": [0, np.nan]})
- ctx = df.style.format_null()._translate()
+ ctx = df.style.set_na_rep("-")._translate()
+ result = ctx["body"][1][1]["display_value"]
+ expected = "-"
+ assert result == expected
+
+ def test_format_with_na_rep(self):
+ # GH 28358
+ df = pd.DataFrame({"A": [0, np.nan]})
+ ctx = df.style.format(na_rep="-")._translate()
  result = ctx["body"][1][1]["display_value"]
  expected = "-"
  assert result == expected