Skip to content
Prev Previous commit
Next Next commit
more tests
  • Loading branch information
TrevorBergeron committed Oct 22, 2025
commit 36679973f307f08dc8883f21c2cbcbeb0c3d7029
20 changes: 20 additions & 0 deletions bigframes/core/compile/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,26 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
assert isinstance(op, string_ops.StrContainsRegexOp)
return input.str.contains(pattern=op.pat, literal=False)

@compile_op.register(string_ops.UpperOp)
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
assert isinstance(op, string_ops.UpperOp)
return input.str.to_uppercase()

@compile_op.register(string_ops.LowerOp)
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
assert isinstance(op, string_ops.LowerOp)
return input.str.to_lowercase()

@compile_op.register(string_ops.ArrayLenOp)
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
assert isinstance(op, string_ops.ArrayLenOp)
return input.list.len()

@compile_op.register(string_ops.StrLenOp)
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
assert isinstance(op, string_ops.StrLenOp)
return input.str.len_chars()

@compile_op.register(string_ops.StartsWithOp)
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
assert isinstance(op, string_ops.StartsWithOp)
Expand Down
21 changes: 20 additions & 1 deletion bigframes/core/compile/polars/lowering.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
generic_ops,
json_ops,
numeric_ops,
string_ops,
)
import bigframes.operations as ops

Expand Down Expand Up @@ -347,11 +348,28 @@ def lower(self, expr: expression.OpExpression) -> expression.Expression:
return ops.coalesce_op.as_expr(new_isin, expression.const(False))


class LowerLenOp(op_lowering.OpLoweringRule):
@property
def op(self) -> type[ops.ScalarOp]:
return string_ops.LenOp

def lower(self, expr: expression.OpExpression) -> expression.Expression:
assert isinstance(expr.op, string_ops.LenOp)
arg = expr.children[0]

if dtypes.is_string_like(arg.output_type):
return string_ops.StrLenOp().as_expr(arg)
elif dtypes.is_array_like(arg.output_type):
return string_ops.ArrayLenOp().as_expr(arg)
else:
raise ValueError(f"Unexpected type: {arg.output_type}")


def _coerce_comparables(
expr1: expression.Expression,
expr2: expression.Expression,
*,
bools_only: bool = False
bools_only: bool = False,
):
if bools_only:
if (
Expand Down Expand Up @@ -446,6 +464,7 @@ def _lower_cast(cast_op: ops.AsTypeOp, arg: expression.Expression):
LowerAsTypeRule(),
LowerInvertOp(),
LowerIsinOp(),
LowerLenOp(),
)


Expand Down
7 changes: 6 additions & 1 deletion bigframes/operations/python_op_maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@
operator.sub: numeric_ops.sub_op,
operator.mul: numeric_ops.mul_op,
operator.truediv: numeric_ops.div_op,
operator.floordiv: numeric_ops.floordiv_op,
operator.mod: numeric_ops.mod_op,
operator.pow: numeric_ops.pow_op,
operator.pos: numeric_ops.pos_op,
operator.neg: numeric_ops.neg_op,
operator.abs: numeric_ops.abs_op,
operator.eq: comparison_ops.eq_null_match_op,
operator.eq: comparison_ops.eq_op,
operator.ne: comparison_ops.ne_op,
operator.gt: comparison_ops.gt_op,
operator.lt: comparison_ops.lt_op,
Expand All @@ -49,6 +50,8 @@
## math
math.log: numeric_ops.ln_op,
math.log10: numeric_ops.log10_op,
math.log1p: numeric_ops.log1p_op,
math.expm1: numeric_ops.expm1_op,
math.sin: numeric_ops.sin_op,
math.cos: numeric_ops.cos_op,
math.tan: numeric_ops.tan_op,
Expand All @@ -58,6 +61,8 @@
math.asin: numeric_ops.arcsin_op,
math.acos: numeric_ops.arccos_op,
math.atan: numeric_ops.arctan_op,
math.floor: numeric_ops.floor_op,
math.ceil: numeric_ops.ceil_op,
## str
str.upper: string_ops.upper_op,
str.lower: string_ops.lower_op,
Expand Down
17 changes: 17 additions & 0 deletions bigframes/operations/string_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,23 @@
)
len_op = LenOp()

## Specialized len ops for compile-time lowering
StrLenOp = base_ops.create_unary_op(
name="strlen",
type_signature=op_typing.FixedOutputType(
dtypes.is_string_like, dtypes.INT_DTYPE, description="string-like"
),
)
str_len_op = StrLenOp()

ArrayLenOp = base_ops.create_unary_op(
name="arraylen",
type_signature=op_typing.FixedOutputType(
dtypes.is_array_like, dtypes.INT_DTYPE, description="array-like"
),
)
array_len_op = ArrayLenOp()

ReverseOp = base_ops.create_unary_op(
name="reverse", type_signature=op_typing.STRING_TRANSFORM
)
Expand Down
48 changes: 48 additions & 0 deletions tests/unit/test_series_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import datetime as dt
import json
import math
import operator
import pathlib
import re
import tempfile
Expand Down Expand Up @@ -4697,6 +4698,30 @@ def wrapped(x):
assert_series_equal(bf_result, pd_result, check_dtype=False)


@pytest.mark.parametrize(
("ufunc",),
[
pytest.param(str.upper),
pytest.param(str.lower),
pytest.param(len),
],
)
def test_series_apply_python_string_fns(scalars_dfs, ufunc):
scalars_df, scalars_pandas_df = scalars_dfs

bf_col = scalars_df["string_col"]
bf_result = bf_col.apply(ufunc).to_pandas()

pd_col = scalars_pandas_df["string_col"]

def wrapped(x):
return ufunc(x) if isinstance(x, str) else None

pd_result = pd_col.apply(wrapped)

assert_series_equal(bf_result, pd_result, check_dtype=False)


@pytest.mark.parametrize(
("ufunc",),
[
Expand All @@ -4720,6 +4745,29 @@ def test_combine_series_ufunc(scalars_dfs, ufunc):
assert_series_equal(bf_result, pd_result, check_dtype=False)


@pytest.mark.parametrize(
("func",),
[
pytest.param(operator.add),
pytest.param(operator.truediv),
],
ids=[
"add",
"divide",
],
)
def test_combine_series_pyfunc(scalars_dfs, func):
scalars_df, scalars_pandas_df = scalars_dfs

bf_col = scalars_df["int64_col"].dropna()
bf_result = bf_col.combine(bf_col, func).to_pandas()

pd_col = scalars_pandas_df["int64_col"].dropna()
pd_result = pd_col.combine(pd_col, func)

assert_series_equal(bf_result, pd_result, check_dtype=False)


def test_combine_scalar_ufunc(scalars_dfs):
scalars_df, scalars_pandas_df = scalars_dfs

Expand Down