googleapis
diff --git a/‎bigframes/core/compile/aggregate_compiler.py‎
Lines changed: 41 additions & 19 deletions b/‎bigframes/core/compile/aggregate_compiler.py‎
Lines changed: 41 additions & 19 deletions
diff --git a/‎bigframes/core/reshape/tile.py‎
Lines changed: 15 additions & 7 deletions b/‎bigframes/core/reshape/tile.py‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎bigframes/operations/aggregations.py‎
Lines changed: 16 additions & 6 deletions b/‎bigframes/operations/aggregations.py‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎tests/system/small/test_pandas.py‎
Lines changed: 65 additions & 26 deletions b/‎tests/system/small/test_pandas.py‎
Lines changed: 65 additions & 26 deletions
@@ -364,8 +364,13 @@ def _(
 
  if op.labels is False:
  for this_bin in range(op.bins - 1):
+ if op.right:
+ case_expr = x <= (col_min + (this_bin + 1) * bin_width)
+
+ else:
+ case_expr = x < (col_min + (this_bin + 1) * bin_width)
  out = out.when(
- x <= (col_min + (this_bin + 1) * bin_width),
+ case_expr,
  compile_ibis_types.literal_to_ibis_scalar(
  this_bin, force_dtype=pd.Int64Dtype()
  ),
@@ -375,32 +380,49 @@ def _(
  interval_struct = None
  adj = (col_max - col_min) * 0.001
  for this_bin in range(op.bins):
- left_edge = (
- col_min + this_bin * bin_width - (0 if this_bin > 0 else adj)
- )
- right_edge = col_min + (this_bin + 1) * bin_width
- interval_struct = ibis_types.struct(
- {
- "left_exclusive": left_edge,
- "right_inclusive": right_edge,
- }
- )
+ left_edge_adj = adj if this_bin == 0 and op.right else 0
+ right_edge_adj = adj if this_bin == op.bins - 1 and not op.right else 0
+
+ left_edge = col_min + this_bin * bin_width - left_edge_adj
+ right_edge = col_min + (this_bin + 1) * bin_width + right_edge_adj
+
+ if op.right:
+ interval_struct = ibis_types.struct(
+ {
+ "left_exclusive": left_edge,
+ "right_inclusive": right_edge,
+ }
+ )
+ else:
+ interval_struct = ibis_types.struct(
+ {
+ "left_inclusive": left_edge,
+ "right_exclusive": right_edge,
+ }
+ )
 
  if this_bin < op.bins - 1:
- out = out.when(
- x <= (col_min + (this_bin + 1) * bin_width),
- interval_struct,
- )
+ if op.right:
+ case_expr = x <= (col_min + (this_bin + 1) * bin_width)
+ else:
+ case_expr = x < (col_min + (this_bin + 1) * bin_width)
+ out = out.when(case_expr, interval_struct)
  else:
  out = out.when(x.notnull(), interval_struct)
  else: # Interpret as intervals
  for interval in op.bins:
  left = compile_ibis_types.literal_to_ibis_scalar(interval[0])
  right = compile_ibis_types.literal_to_ibis_scalar(interval[1])
- condition = (x > left) & (x <= right)
- interval_struct = ibis_types.struct(
- {"left_exclusive": left, "right_inclusive": right}
- )
+ if op.right:
+ condition = (x > left) & (x <= right)
+ interval_struct = ibis_types.struct(
+ {"left_exclusive": left, "right_inclusive": right}
+ )
+ else:
+ condition = (x >= left) & (x < right)
+ interval_struct = ibis_types.struct(
+ {"left_inclusive": left, "right_exclusive": right}
+ )
  out = out.when(condition, interval_struct)
  return out.end()
 
 
@@ -15,7 +15,6 @@
 from __future__ import annotations
 
 import typing
-from typing import Iterable, Optional, Union
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile
@@ -33,26 +32,34 @@
 
 def cut(
  x: bigframes.series.Series,
- bins: Union[
+ bins: typing.Union[
  int,
  pd.IntervalIndex,
- Iterable,
+ typing.Iterable,
  ],
  *,
- labels: Union[Iterable[str], bool, None] = None,
+ right: typing.Optional[bool] = True,
+ labels: typing.Union[typing.Iterable[str], bool, None] = None,
 ) -> bigframes.series.Series:
  if isinstance(bins, int) and bins <= 0:
  raise ValueError("`bins` should be a positive integer.")
 
- if isinstance(bins, Iterable):
+ # TODO: Check `right` does not apply for IntervalIndex.
+
+ if isinstance(bins, typing.Iterable):
  if isinstance(bins, pd.IntervalIndex):
+ # TODO: test an empty internval index
  as_index: pd.IntervalIndex = bins
  bins = tuple((bin.left.item(), bin.right.item()) for bin in bins)
+ # To maintain consistency with pandas' behavior
+ right = True
  elif len(list(bins)) == 0:
  raise ValueError("`bins` iterable should have at least one item")
  elif isinstance(list(bins)[0], tuple):
  as_index = pd.IntervalIndex.from_tuples(list(bins))
  bins = tuple(bins)
+ # To maintain consistency with pandas' behavior
+ right = True
  elif pd.api.types.is_number(list(bins)[0]):
  bins_list = list(bins)
  if len(bins_list) < 2:
@@ -82,7 +89,8 @@ def cut(
  )
 
  return x._apply_window_op(
- agg_ops.CutOp(bins, labels=labels), window_spec=window_specs.unbound()
+ agg_ops.CutOp(bins, right=right, labels=labels),
+ window_spec=window_specs.unbound(),
  )
 
 
@@ -93,7 +101,7 @@ def qcut(
  x: bigframes.series.Series,
  q: typing.Union[int, typing.Sequence[float]],
  *,
- labels: Optional[bool] = None,
+ labels: typing.Optional[bool] = None,
  duplicates: typing.Literal["drop", "error"] = "error",
 ) -> bigframes.series.Series:
  if isinstance(q, int) and q <= 0:
 
@@ -339,6 +339,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
 class CutOp(UnaryWindowOp):
  # TODO: Unintuitive, refactor into multiple ops?
  bins: typing.Union[int, Iterable]
+ right: Optional[bool]
  labels: Optional[bool]
 
  @property
@@ -355,12 +356,21 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
  if isinstance(self.bins, int)
  else dtypes.infer_literal_arrow_type(list(self.bins)[0][0])
  )
- pa_type = pa.struct(
- [
- pa.field("left_exclusive", interval_dtype, nullable=True),
- pa.field("right_inclusive", interval_dtype, nullable=True),
- ]
- )
+ if self.right:
+ pa_type = pa.struct(
+ [
+ pa.field("left_exclusive", interval_dtype, nullable=True),
+ pa.field("right_inclusive", interval_dtype, nullable=True),
+ ]
+ )
+ else:
+ pa_type = pa.struct(
+ [
+ pa.field("left_inclusive", interval_dtype, nullable=True),
+ pa.field("right_exclusive", interval_dtype, nullable=True),
+ ]
+ )
+
  return pd.ArrowDtype(pa_type)
 
  @property
 
@@ -386,33 +386,53 @@ def test_merge_series(scalars_dfs, merge_how):
 
  assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
-
-def test_cut(scalars_dfs):
+@pytest.mark.parametrize(
+ ("right"),
+ [
+ pytest.param(True),
+ pytest.param(False),
+ ],
+)
+def test_cut(scalars_dfs, right):
  scalars_df, scalars_pandas_df = scalars_dfs
 
- pd_result = pd.cut(scalars_pandas_df["float64_col"], 5, labels=False)
- bf_result = bpd.cut(scalars_df["float64_col"], 5, labels=False)
+ pd_result = pd.cut(scalars_pandas_df["float64_col"], 5, labels=False, right=right)
+ bf_result = bpd.cut(scalars_df["float64_col"], 5, labels=False, right=right)
 
  # make sure the result is a supported dtype
  assert bf_result.dtype == bpd.Int64Dtype()
  pd_result = pd_result.astype("Int64")
  pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
 
 
-def test_cut_default_labels(scalars_dfs):
+@pytest.mark.parametrize(
+ ("right"),
+ [
+ pytest.param(True),
+ pytest.param(False),
+ ],
+)
+def test_cut_default_labels(scalars_dfs, right):
  scalars_df, scalars_pandas_df = scalars_dfs
 
- pd_result = pd.cut(scalars_pandas_df["float64_col"], 5)
- bf_result = bpd.cut(scalars_df["float64_col"], 5).to_pandas()
+ pd_result = pd.cut(scalars_pandas_df["float64_col"], 5, right=right)
+ bf_result = bpd.cut(scalars_df["float64_col"], 5, right=right).to_pandas()
 
  # Convert to match data format
+ pd_interval = pd_result.cat.categories[pd_result.cat.codes]
+ if pd_interval.closed == "left":
+ left_key = "left_inclusive"
+ right_key = "right_exclusive"
+ else:
+ left_key = "left_exclusive"
+ right_key = "right_inclusive"
  pd_result_converted = pd.Series(
  [
- {"left_exclusive": interval.left, "right_inclusive": interval.right}
+ {left_key: interval.left, right_key: interval.right}
  if pd.notna(val)
  else pd.NA
  for val, interval in zip(
- pd_result, pd_result.cat.categories[pd_result.cat.codes]
+ pd_result, pd_interval
  )
  ],
  name=pd_result.name,
@@ -424,27 +444,36 @@ def test_cut_default_labels(scalars_dfs):
 
 
 @pytest.mark.parametrize(
- ("breaks",),
+ ("breaks", "right"),
  [
- ([0, 5, 10, 15, 20, 100, 1000],), # ints
- ([0.5, 10.5, 15.5, 20.5, 100.5, 1000.5],), # floats
- ([0, 5, 10.5, 15.5, 20, 100, 1000.5],), # mixed
+ pytest.param([0, 5, 10, 15, 20, 100, 1000], True, id="int_right"),
+ pytest.param([0, 5, 10, 15, 20, 100, 1000], False, id="int_left"),
+ pytest.param([0.5, 10.5, 15.5, 20.5, 100.5, 1000.5], False, id="float_left"),
+ pytest.param([0, 5, 10.5, 15.5, 20, 100, 1000.5], True, id="mixed_right"),
  ],
 )
-def test_cut_numeric_breaks(scalars_dfs, breaks):
+def test_cut_numeric_breaks(scalars_dfs, breaks, right):
  scalars_df, scalars_pandas_df = scalars_dfs
 
- pd_result = pd.cut(scalars_pandas_df["float64_col"], breaks)
- bf_result = bpd.cut(scalars_df["float64_col"], breaks).to_pandas()
+ pd_result = pd.cut(scalars_pandas_df["float64_col"], breaks, right=right)
+ bf_result = bpd.cut(scalars_df["float64_col"], breaks, right=right).to_pandas()
 
  # Convert to match data format
+ pd_interval = pd_result.cat.categories[pd_result.cat.codes]
+ if pd_interval.closed == "left":
+ left_key = "left_inclusive"
+ right_key = "right_exclusive"
+ else:
+ left_key = "left_exclusive"
+ right_key = "right_inclusive"
+
  pd_result_converted = pd.Series(
  [
- {"left_exclusive": interval.left, "right_inclusive": interval.right}
+ {left_key: interval.left, right_key: interval.right}
  if pd.notna(val)
  else pd.NA
  for val, interval in zip(
- pd_result, pd_result.cat.categories[pd_result.cat.codes]
+ pd_result, pd_interval
  )
  ],
  name=pd_result.name,
@@ -476,28 +505,38 @@ def test_cut_errors(scalars_dfs, bins):
 
 
 @pytest.mark.parametrize(
- ("bins",),
+ ("bins", "right"),
  [
- ([(-5, 2), (2, 3), (-3000, -10)],),
- (pd.IntervalIndex.from_tuples([(1, 2), (2, 3), (4, 5)]),),
+ pytest.param([(-5, 2), (2, 3), (-3000, -10)], True, id="tuple_right"),
+ pytest.param([(-5, 2), (2, 3), (-3000, -10)], False, id="tuple_left"),
+ pytest.param(pd.IntervalIndex.from_tuples([(1, 2), (2, 3), (4, 5)]), True, id="interval_right"),
+ pytest.param(pd.IntervalIndex.from_tuples([(1, 2), (2, 3), (4, 5)]), False, id="interval_left"),
  ],
 )
-def test_cut_with_interval(scalars_dfs, bins):
+def test_cut_with_interval(scalars_dfs, bins, right):
  scalars_df, scalars_pandas_df = scalars_dfs
- bf_result = bpd.cut(scalars_df["int64_too"], bins, labels=False).to_pandas()
+ bf_result = bpd.cut(scalars_df["int64_too"], bins, labels=False, right=right).to_pandas()
 
  if isinstance(bins, list):
  bins = pd.IntervalIndex.from_tuples(bins)
- pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=False)
+ pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=False, right=right)
 
  # Convert to match data format
+ pd_interval = pd_result.cat.categories[pd_result.cat.codes]
+ if pd_interval.closed == "left":
+ left_key = "left_inclusive"
+ right_key = "right_exclusive"
+ else:
+ left_key = "left_exclusive"
+ right_key = "right_inclusive"
+
  pd_result_converted = pd.Series(
  [
- {"left_exclusive": interval.left, "right_inclusive": interval.right}
+ {left_key: interval.left, right_key: interval.right}
  if pd.notna(val)
  else pd.NA
  for val, interval in zip(
- pd_result, pd_result.cat.categories[pd_result.cat.codes]
+ pd_result, pd_interval
  )
  ],
  name=pd_result.name,