-
- Notifications
You must be signed in to change notification settings - Fork 19.4k
ENH: DataFrame.plot.scatter argument c now accepts a column of strings, where rows with the same string are colored identically #59239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
b91e635 8609ea5 b4440c1 571c0c8 e9511d0 1ca57ed fb0d6e4 4bcdbfc 7972138 45886d9 1713727 62427ad 609fe40 6e86858 5223f2a d97606c 7e5a02a File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -10,6 +10,7 @@ | |
| Iterator, | ||
| Sequence, | ||
| ) | ||
| from random import shuffle | ||
| from typing import ( | ||
| TYPE_CHECKING, | ||
| Any, | ||
| | @@ -1337,10 +1338,12 @@ def _make_plot(self, fig: Figure) -> None: | |
| norm, cmap = self._get_norm_and_cmap(c_values, color_by_categorical) | ||
| cb = self._get_colorbar(c_values, c_is_column) | ||
| | ||
| orig_invalid_colors = not self._are_valid_colors(c_values) | ||
| if orig_invalid_colors: | ||
| unique_color_labels, c_values = self._convert_str_to_colors(c_values) | ||
| cb = False | ||
| # if a list of non color strings is passed in as c, generate a list | ||
| # colored by uniqueness of the strings, such same strings get same color | ||
| create_colors = not self._are_valid_colors(c_values) | ||
| if create_colors: | ||
| color_mapping, c_values = self._uniquely_color_strs(c_values) | ||
| cb = False # no colorbar; opt for legend | ||
| | ||
| if self.legend: | ||
| label = self.label | ||
| | @@ -1372,14 +1375,14 @@ def _make_plot(self, fig: Figure) -> None: | |
| label, # type: ignore[arg-type] | ||
| ) | ||
| | ||
| if orig_invalid_colors: | ||
| for s in unique_color_labels: | ||
| self._append_legend_handles_labels( | ||
| # error: Argument 2 to "_append_legend_handles_labels" of | ||
| # "MPLPlot" has incompatible type "Hashable"; expected "str" | ||
| scatter, | ||
| s, # type: ignore[arg-type] | ||
| ) | ||
| # build legend for labeling custom colors | ||
| if create_colors: | ||
| ax.legend( | ||
| handles=[ | ||
| mpl.patches.Circle((0, 0), facecolor=color, label=string) | ||
| for string, color in color_mapping.items() | ||
| ] | ||
| ) | ||
| | ||
| errors_x = self._get_errorbars(label=x, index=0, yerr=False) | ||
| errors_y = self._get_errorbars(label=y, index=0, xerr=False) | ||
| | @@ -1404,29 +1407,31 @@ def _get_c_values(self, color, color_by_categorical: bool, c_is_column: bool): | |
| c_values = c | ||
| return c_values | ||
| | ||
| def _are_valid_colors(self, c_values): | ||
| # check if c_values contains strings. no need to check numerics as these | ||
| # will be validated for us in .Axes.scatter._parse_scatter_color_args(...) | ||
| if not ( | ||
| np.iterable(c_values) and len(c_values) > 0 and isinstance(c_values[0], str) | ||
| ): | ||
| return True | ||
| | ||
| def _are_valid_colors(self, c_values: np.ndarray | list): | ||
| # check if c_values contains strings and if these strings are valid mpl colors | ||
| # no need to check numerics as these (and mpl colors) will be validated for us | ||
| # in .Axes.scatter._parse_scatter_color_args(...) | ||
| try: | ||
| # similar to above, if this conversion is successful, remaining validation | ||
| # will be done in .Axes.scatter._parse_scatter_color_args(...) | ||
| _ = mpl.colors.to_rgba_array(c_values) | ||
| if len(c_values) and all(isinstance(c, str) for c in c_values): | ||
| mpl.colors.to_rgba_array(c_values) | ||
| | ||
| return True | ||
| | ||
| except (TypeError, ValueError) as _: | ||
| return False | ||
| | ||
| def _convert_str_to_colors(self, c_values): | ||
| def _uniquely_color_strs( | ||
| ||
| self, c_values: np.ndarray | list | ||
| ) -> tuple[dict, np.ndarray]: | ||
| # well, almost uniquely color them (up to 949) | ||
| possible_colors = list(mpl.colors.XKCD_COLORS.values()) # Hex representations | ||
| shuffle(possible_colors) # TODO: find better way of getting colors | ||
| | ||
| unique = np.unique(c_values) | ||
| colors = np.linspace(0, 1, len(unique)) | ||
| colors = [possible_colors[i % len(possible_colors)] for i in range(len(unique))] | ||
| color_mapping = dict(zip(unique, colors)) | ||
| | ||
| return unique, np.array(list(map(color_mapping.get, c_values))) | ||
| return color_mapping, np.array(list(map(color_mapping.get, c_values))) | ||
| | ||
| def _get_norm_and_cmap(self, c_values, color_by_categorical: bool): | ||
| c = self.c | ||
| | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In what instances is
c_valuesa list? Might be misreading but would be better if we only worked with a pd.Series and could call .unique on that, instead of checking every single value in a loopThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to take a
pd.Series, notnp.ndarray | list