Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update algorithms.Factorize.time_factorize
  • Loading branch information
simonjayhawkins committed Feb 15, 2021
commit 42ca9c3dabdc220da8265840c7b54dbfd9749986
14 changes: 13 additions & 1 deletion asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pandas._libs import lib

import pandas as pd
from pandas.core.arrays.string_arrow import ArrowStringDtype
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you do this in a try/except? (we need to be able to still run the benchmarks with slightly older pandas version that might not have this import available)


from .pandas_vb_common import tm

Expand Down Expand Up @@ -43,23 +44,34 @@ class Factorize:
"datetime64[ns, tz]",
"Int64",
"boolean",
"string_arrow",
],
]
param_names = ["unique", "sort", "dtype"]

def setup(self, unique, sort, dtype):
N = 10 ** 5
string_index = tm.makeStringIndex(N)
try:
string_arrow = pd.array(string_index, dtype=ArrowStringDtype())
except ImportError:
string_arrow = None

if dtype == "string_arrow" and not string_arrow:
raise NotImplementedError

data = {
"int": pd.Int64Index(np.arange(N)),
"uint": pd.UInt64Index(np.arange(N)),
"float": pd.Float64Index(np.random.randn(N)),
"string": tm.makeStringIndex(N),
"string": string_index,
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
"datetime64[ns, tz]": pd.date_range(
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
),
"Int64": pd.array(np.arange(N), dtype="Int64"),
"boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
"string_arrow": string_arrow,
}[dtype]
if not unique:
data = data.repeat(5)
Expand Down