pandas-dev
diff --git a/‎doc/source/whatsnew/v2.0.0.rst‎
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v2.0.0.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/core/algorithms.py‎
Lines changed: 3 additions & 0 deletions b/‎pandas/core/algorithms.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/core/arrays/arrow/array.py‎
Lines changed: 2 additions & 3 deletions b/‎pandas/core/arrays/arrow/array.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎pandas/tests/arrays/string_/test_string.py‎
Lines changed: 11 additions & 3 deletions b/‎pandas/tests/arrays/string_/test_string.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎pandas/tests/base/test_value_counts.py‎
Lines changed: 10 additions & 4 deletions b/‎pandas/tests/base/test_value_counts.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎pandas/tests/extension/test_arrow.py‎
Lines changed: 19 additions & 3 deletions b/‎pandas/tests/extension/test_arrow.py‎
Lines changed: 19 additions & 3 deletions
diff --git a/‎pandas/tests/extension/test_string.py‎
Lines changed: 11 additions & 62 deletions b/‎pandas/tests/extension/test_string.py‎
Lines changed: 11 additions & 62 deletions
@@ -818,6 +818,7 @@ Other API changes
 - :class:`DataFrame` and :class:`DataFrameGroupBy` aggregations (e.g. "sum") with object-dtype columns no longer infer non-object dtypes for their results, explicitly call ``result.infer_objects(copy=False)`` on the result to obtain the old behavior (:issue:`51205`, :issue:`49603`)
 - Division by zero with :class:`ArrowDtype` dtypes returns ``-inf``, ``nan``, or ``inf`` depending on the numerator, instead of raising (:issue:`51541`)
 - Added :func:`pandas.api.types.is_any_real_numeric_dtype` to check for real numeric dtypes (:issue:`51152`)
+- :meth:`~arrays.ArrowExtensionArray.value_counts` now returns data with :class:`ArrowDtype` with ``pyarrow.int64`` type instead of ``"Int64"`` type (:issue:`51462`)
 
 .. note::
 
 
@@ -873,6 +873,9 @@ def value_counts(
  result.name = name
  result.index.name = index_name
  counts = result._values
+ if not isinstance(counts, np.ndarray):
+ # e.g. ArrowExtensionArray
+ counts = np.asarray(counts)
 
  elif isinstance(values, ABCMultiIndex):
  # GH49558
 
@@ -983,12 +983,11 @@ def value_counts(self, dropna: bool = True) -> Series:
  if pa.types.is_duration(pa_type):
  values = values.cast(pa_type)
 
- # No missing values so we can adhere to the interface and return a numpy array.
- counts = np.array(counts)
+ counts = ArrowExtensionArray(counts)
 
  index = Index(type(self)(values))
 
- return Series(counts, index=index, name="count").astype("Int64")
+ return Series(counts, index=index, name="count")
 
  @classmethod
  def _concat_same_type(
 
@@ -453,20 +453,28 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage2):
 
 
 def test_value_counts_na(dtype):
+ if getattr(dtype, "storage", "") == "pyarrow":
+ exp_dtype = "int64[pyarrow]"
+ else:
+ exp_dtype = "Int64"
  arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype)
  result = arr.value_counts(dropna=False)
- expected = pd.Series([2, 1, 1], index=arr[[0, 1, 3]], dtype="Int64", name="count")
+ expected = pd.Series([2, 1, 1], index=arr[[0, 1, 3]], dtype=exp_dtype, name="count")
  tm.assert_series_equal(result, expected)
 
  result = arr.value_counts(dropna=True)
- expected = pd.Series([2, 1], index=arr[:2], dtype="Int64", name="count")
+ expected = pd.Series([2, 1], index=arr[:2], dtype=exp_dtype, name="count")
  tm.assert_series_equal(result, expected)
 
 
 def test_value_counts_with_normalize(dtype):
+ if getattr(dtype, "storage", "") == "pyarrow":
+ exp_dtype = "double[pyarrow]"
+ else:
+ exp_dtype = "Float64"
  ser = pd.Series(["a", "b", "a", pd.NA], dtype=dtype)
  result = ser.value_counts(normalize=True)
- expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3
+ expected = pd.Series([2, 1], index=ser[:2], dtype=exp_dtype, name="proportion") / 3
  tm.assert_series_equal(result, expected)
 
 
 
@@ -39,8 +39,11 @@ def test_value_counts(index_or_series_obj):
  expected.index.name = obj.name
 
  if not isinstance(result.dtype, np.dtype):
- # i.e IntegerDtype
- expected = expected.astype("Int64")
+ if getattr(obj.dtype, "storage", "") == "pyarrow":
+ expected = expected.astype("int64[pyarrow]")
+ else:
+ # i.e IntegerDtype
+ expected = expected.astype("Int64")
 
  # TODO(GH#32514): Order of entries with the same count is inconsistent
  # on CI (gh-32449)
@@ -90,8 +93,11 @@ def test_value_counts_null(null_obj, index_or_series_obj):
  result = result.sort_index()
 
  if not isinstance(result.dtype, np.dtype):
- # i.e IntegerDtype
- expected = expected.astype("Int64")
+ if getattr(obj.dtype, "storage", "") == "pyarrow":
+ expected = expected.astype("int64[pyarrow]")
+ else:
+ # i.e IntegerDtype
+ expected = expected.astype("Int64")
  tm.assert_series_equal(result, expected)
 
  expected[null_obj] = 3
 
@@ -785,9 +785,25 @@ def test_diff(self, data, periods, request):
  )
  super().test_diff(data, periods)
 
- @pytest.mark.parametrize("dropna", [True, False])
- def test_value_counts(self, all_data, dropna, request):
- super().test_value_counts(all_data, dropna)
+ def test_value_counts_returns_pyarrow_int64(self, data):
+ # GH 51462
+ data = data[:10]
+ result = data.value_counts()
+ assert result.dtype == ArrowDtype(pa.int64())
+
+ def test_value_counts_with_normalize(self, data, request):
+ data = data[:10].unique()
+ values = np.array(data[~data.isna()])
+ ser = pd.Series(data, dtype=data.dtype)
+
+ result = ser.value_counts(normalize=True).sort_index()
+
+ expected = pd.Series(
+ [1 / len(values)] * len(values), index=result.index, name="proportion"
+ )
+ expected = expected.astype("double[pyarrow]")
+
+ self.assert_series_equal(result, expected)
 
  def test_argmin_argmax(
  self, data_for_sorting, data_missing_for_sorting, na_value, request
 
@@ -18,7 +18,6 @@
 import numpy as np
 import pytest
 
-from pandas.compat import pa_version_under7p0
 from pandas.errors import PerformanceWarning
 
 import pandas as pd
@@ -196,70 +195,20 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
 
 
 class TestMethods(base.BaseMethodsTests):
- def test_argsort(self, data_for_sorting):
- with tm.maybe_produces_warning(
- PerformanceWarning,
- pa_version_under7p0
- and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow",
- check_stacklevel=False,
- ):
- super().test_argsort(data_for_sorting)
+ def test_value_counts_with_normalize(self, data):
+ data = data[:10].unique()
+ values = np.array(data[~data.isna()])
+ ser = pd.Series(data, dtype=data.dtype)
 
- def test_argsort_missing(self, data_missing_for_sorting):
- with tm.maybe_produces_warning(
- PerformanceWarning,
- pa_version_under7p0
- and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow",
- check_stacklevel=False,
- ):
- super().test_argsort_missing(data_missing_for_sorting)
-
- def test_argmin_argmax(
- self, data_for_sorting, data_missing_for_sorting, na_value, request
- ):
- super().test_argmin_argmax(data_for_sorting, data_missing_for_sorting, na_value)
-
- @pytest.mark.parametrize(
- "op_name, skipna, expected",
- [
- ("idxmax", True, 0),
- ("idxmin", True, 2),
- ("argmax", True, 0),
- ("argmin", True, 2),
- ("idxmax", False, np.nan),
- ("idxmin", False, np.nan),
- ("argmax", False, -1),
- ("argmin", False, -1),
- ],
- )
- def test_argreduce_series(
- self, data_missing_for_sorting, op_name, skipna, expected, request
- ):
- super().test_argreduce_series(
- data_missing_for_sorting, op_name, skipna, expected
- )
+ result = ser.value_counts(normalize=True).sort_index()
 
- @pytest.mark.parametrize("dropna", [True, False])
- def test_value_counts(self, all_data, dropna, request):
- all_data = all_data[:10]
- if dropna:
- other = all_data[~all_data.isna()]
+  expected = pd.Series(
+  [1 / len(values)] * len(values), index=result.index, name="proportion"
+ )
+ if getattr(data.dtype, "storage", "") == "pyarrow":
+ expected = expected.astype("double[pyarrow]")
  else:
- other = all_data
- with tm.maybe_produces_warning(
- PerformanceWarning,
- pa_version_under7p0
- and getattr(all_data.dtype, "storage", "") == "pyarrow"
- and not (dropna and "data_missing" in request.node.nodeid),
- ):
- result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
- with tm.maybe_produces_warning(
- PerformanceWarning,
- pa_version_under7p0
- and getattr(other.dtype, "storage", "") == "pyarrow"
- and not (dropna and "data_missing" in request.node.nodeid),
- ):
- expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
+ expected = expected.astype("Float64")
 
  self.assert_series_equal(result, expected)