Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
5e1cb8c
Add html repr for groupby dataframe and series
JulianWgs Jun 22, 2020
9c3df8a
Sort imports with isort in groupby.py
JulianWgs Jun 22, 2020
4a3911a
Improve variable naming
JulianWgs Jul 25, 2020
46f5353
Add display.max_groups to config
JulianWgs Jul 25, 2020
c840c29
Add group display truncation for too many groups
JulianWgs Jul 25, 2020
139bdc6
Implement faster and more scalable list variant
JulianWgs Jul 25, 2020
1020be9
Black config_init
JulianWgs Jul 25, 2020
2e4a6ee
Fix bug which displayed too few rows
JulianWgs Jul 25, 2020
ea2f151
Add test for groupby representation
JulianWgs Jul 26, 2020
2443b80
Delete trailing whitespace in comment
JulianWgs Jul 26, 2020
913afb0
Add test cases for truncated rows and groups
JulianWgs Jul 26, 2020
7efc505
Merge remote-tracking branch 'upstream/master'
JulianWgs Aug 24, 2020
d85fc63
Skip test if lxml is not installed
JulianWgs Sep 3, 2020
778d90d
Move html repr function to io/formats/format.py
JulianWgs Sep 5, 2020
9736007
Add doc string and return type annotation
JulianWgs Sep 5, 2020
7f1937c
Add type annotations for input arg
JulianWgs Sep 5, 2020
388f35d
Fix linting errors
JulianWgs Sep 5, 2020
228e659
Move import to the correct location
JulianWgs Sep 5, 2020
dee1220
Remove pandas type annotations
JulianWgs Oct 3, 2020
57b8bf3
Merge remote-tracking branch 'upstream/master'
JulianWgs Oct 26, 2020
2c5c394
Remove inconsistent use of pd namespace in tests
JulianWgs Oct 28, 2020
669c047
Fix typo and capitalize pandas objs correctly
JulianWgs Oct 30, 2020
8a75299
Change docstring to comment in groupby repr test
JulianWgs Oct 30, 2020
b36177d
Add additional explanation in groupby_repr test
JulianWgs Oct 30, 2020
edff21d
Test more rows in groupby repr when truncated
JulianWgs Nov 11, 2020
580d09b
Test more groups in groupby repr when truncated
JulianWgs Nov 11, 2020
0c948e1
Refactor groups repr html
JulianWgs Jan 3, 2021
e41ff00
Merge remote-tracking branch 'upstream/master'
JulianWgs Jan 3, 2021
ae8721d
Add whatsnew entry for group-by HTML representation
JulianWgs Jan 3, 2021
1c92ed8
Fix test case name
JulianWgs Jan 3, 2021
b92d61f
Rename groupby objects
JulianWgs Jan 3, 2021
579998a
Add case for single and tuple groupby key
JulianWgs Jan 3, 2021
8d8b260
Merge remote-tracking branch 'upstream/master'
JulianWgs Jan 3, 2021
5865cfb
Merge branch 'master' into master
JulianWgs Jul 4, 2021
7a11be8
Move whats new to 1.4.0 release
JulianWgs Jul 4, 2021
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Move html repr function to io/formats/format.py
- from groupby module - #34926 (comment)
  • Loading branch information
JulianWgs committed Sep 5, 2020
commit 778d90d0ef0179bfa88f236ccdef95843f25e14e
25 changes: 4 additions & 21 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class providing the base-class of operations.
from pandas.core.sorting import get_group_index_sorter
from pandas.core.util.numba_ import NUMBA_FUNC_CACHE

from pandas.io.formats.format import repr_html_groupby

_common_see_also = """
See Also
--------
Expand Down Expand Up @@ -550,27 +552,8 @@ def __repr__(self) -> str:
return object.__repr__(self)

def _repr_html_(self) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to reorg this to use a GroupbyFormatter located in pandas/io/formats/groupby.py (it can do pretty much this but just locate the code there) as this is where we keep all of the formatting code.

could also add a .to_string() method but not sure that's actually worth it (maybe open an issue for that).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank for the review! Do you mean pandas/io/formats/html.py? Should I add a new function and then just call that function from the above location?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, i mean pandas/io/formats/format.py (ok to just shove in there is fine, we should split that file up but that's for later).

max_groups = get_option("display.max_groups")
max_rows = max(
1, get_option("display.max_rows") // min(max_groups, self.ngroups)
)
group_names = list(self.groups.keys())
truncated = max_groups < self.ngroups
if truncated:
n_start = (max_groups + 1) // 2
n_end = max_groups - n_start
group_names = group_names[:n_start] + group_names[-n_end:]
repr_html_list = list()
for group_name in group_names:
group = self.get_group(group_name)
if not hasattr(group, "to_html"):
group = group.to_frame()
repr_html_list.append(
f"<H3>Group Key: {group_name}<H3/>\n{group.to_html(max_rows=max_rows)}"
)
if truncated:
repr_html_list.insert(max_groups // 2, "<H3>...<H3/>")
return "\n".join(repr_html_list)
return repr_html_groupby(self)


def _assure_grouper(self):
"""
Expand Down
24 changes: 24 additions & 0 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1990,3 +1990,27 @@ def buffer_put_lines(buf: IO[str], lines: List[str]) -> None:
if any(isinstance(x, str) for x in lines):
lines = [str(x) for x in lines]
buf.write("\n".join(lines))


def repr_html_groupby(group_obj):
max_groups = get_option("display.max_groups")
max_rows = max(
1, get_option("display.max_rows") // min(max_groups, group_obj.ngroups)
)
group_names = list(group_obj.groups.keys())
truncated = max_groups < group_obj.ngroups
if truncated:
n_start = (max_groups + 1) // 2
n_end = max_groups - n_start
group_names = group_names[:n_start] + group_names[-n_end:]
repr_html_list = list()
for group_name in group_names:
group = group_obj.get_group(group_name)
if not hasattr(group, "to_html"):
group = group.to_frame()
repr_html_list.append(
f"<H3>Group Key: {group_name}<H3/>\n{group.to_html(max_rows=max_rows)}"
)
if truncated:
repr_html_list.insert(max_groups // 2, "<H3>...<H3/>")
return "\n".join(repr_html_list)