Skip to content

Commit bbd95e5

Browse files
authored
refactor: reorganize the sqlglot scalar compiler layout - part 2 (#2093)
This change follows up on #2075 by splitting unary_compiler.py and its unit test file into smaller files.
1 parent 920f381 commit bbd95e5

File tree

139 files changed

+2413
-2078
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+2413
-2078
lines changed

bigframes/core/compile/sqlglot/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,18 @@
1414
from __future__ import annotations
1515

1616
from bigframes.core.compile.sqlglot.compiler import SQLGlotCompiler
17+
import bigframes.core.compile.sqlglot.expressions.array_ops # noqa: F401
1718
import bigframes.core.compile.sqlglot.expressions.binary_compiler # noqa: F401
18-
import bigframes.core.compile.sqlglot.expressions.unary_compiler # noqa: F401
19+
import bigframes.core.compile.sqlglot.expressions.blob_ops # noqa: F401
20+
import bigframes.core.compile.sqlglot.expressions.comparison_ops # noqa: F401
21+
import bigframes.core.compile.sqlglot.expressions.date_ops # noqa: F401
22+
import bigframes.core.compile.sqlglot.expressions.datetime_ops # noqa: F401
23+
import bigframes.core.compile.sqlglot.expressions.generic_ops # noqa: F401
24+
import bigframes.core.compile.sqlglot.expressions.geo_ops # noqa: F401
25+
import bigframes.core.compile.sqlglot.expressions.json_ops # noqa: F401
26+
import bigframes.core.compile.sqlglot.expressions.numeric_ops # noqa: F401
27+
import bigframes.core.compile.sqlglot.expressions.string_ops # noqa: F401
28+
import bigframes.core.compile.sqlglot.expressions.struct_ops # noqa: F401
29+
import bigframes.core.compile.sqlglot.expressions.timedelta_ops # noqa: F401
1930

2031
__all__ = ["SQLGlotCompiler"]
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import typing
18+
19+
import sqlglot
20+
import sqlglot.expressions as sge
21+
22+
from bigframes import operations as ops
23+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
24+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
25+
26+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
27+
28+
29+
@register_unary_op(ops.ArrayToStringOp, pass_op=True)
30+
def _(expr: TypedExpr, op: ops.ArrayToStringOp) -> sge.Expression:
31+
return sge.ArrayToString(this=expr.expr, expression=f"'{op.delimiter}'")
32+
33+
34+
@register_unary_op(ops.ArrayIndexOp, pass_op=True)
35+
def _(expr: TypedExpr, op: ops.ArrayIndexOp) -> sge.Expression:
36+
return sge.Bracket(
37+
this=expr.expr,
38+
expressions=[sge.Literal.number(op.index)],
39+
safe=True,
40+
offset=False,
41+
)
42+
43+
44+
@register_unary_op(ops.ArraySliceOp, pass_op=True)
45+
def _(expr: TypedExpr, op: ops.ArraySliceOp) -> sge.Expression:
46+
slice_idx = sqlglot.to_identifier("slice_idx")
47+
48+
conditions: typing.List[sge.Predicate] = [slice_idx >= op.start]
49+
50+
if op.stop is not None:
51+
conditions.append(slice_idx < op.stop)
52+
53+
# local name for each element in the array
54+
el = sqlglot.to_identifier("el")
55+
56+
selected_elements = (
57+
sge.select(el)
58+
.from_(
59+
sge.Unnest(
60+
expressions=[expr.expr],
61+
alias=sge.TableAlias(columns=[el]),
62+
offset=slice_idx,
63+
)
64+
)
65+
.where(*conditions)
66+
)
67+
68+
return sge.array(selected_elements)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import sqlglot.expressions as sge
18+
19+
from bigframes import operations as ops
20+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
21+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
22+
23+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
24+
25+
26+
@register_unary_op(ops.obj_fetch_metadata_op)
27+
def _(expr: TypedExpr) -> sge.Expression:
28+
return sge.func("OBJ.FETCH_METADATA", expr.expr)
29+
30+
31+
@register_unary_op(ops.ObjGetAccessUrl)
32+
def _(expr: TypedExpr) -> sge.Expression:
33+
return sge.func("OBJ.GET_ACCESS_URL", expr.expr)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import typing
18+
19+
import pandas as pd
20+
import sqlglot.expressions as sge
21+
22+
from bigframes import operations as ops
23+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
24+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
25+
import bigframes.dtypes as dtypes
26+
27+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
28+
29+
30+
@register_unary_op(ops.IsInOp, pass_op=True)
31+
def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression:
32+
values = []
33+
is_numeric_expr = dtypes.is_numeric(expr.dtype)
34+
for value in op.values:
35+
if value is None:
36+
continue
37+
dtype = dtypes.bigframes_type(type(value))
38+
if expr.dtype == dtype or is_numeric_expr and dtypes.is_numeric(dtype):
39+
values.append(sge.convert(value))
40+
41+
if op.match_nulls:
42+
contains_nulls = any(_is_null(value) for value in op.values)
43+
if contains_nulls:
44+
return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In(
45+
this=expr.expr, expressions=values
46+
)
47+
48+
if len(values) == 0:
49+
return sge.convert(False)
50+
51+
return sge.func(
52+
"COALESCE", sge.In(this=expr.expr, expressions=values), sge.convert(False)
53+
)
54+
55+
56+
# Helpers
57+
def _is_null(value) -> bool:
58+
# float NaN/inf should be treated as distinct from 'true' null values
59+
return typing.cast(bool, pd.isna(value)) and not isinstance(value, float)
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import sqlglot.expressions as sge
18+
19+
from bigframes import operations as ops
20+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
21+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
22+
23+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
24+
25+
26+
@register_unary_op(ops.date_op)
27+
def _(expr: TypedExpr) -> sge.Expression:
28+
return sge.Date(this=expr.expr)
29+
30+
31+
@register_unary_op(ops.day_op)
32+
def _(expr: TypedExpr) -> sge.Expression:
33+
return sge.Extract(this=sge.Identifier(this="DAY"), expression=expr.expr)
34+
35+
36+
@register_unary_op(ops.dayofweek_op)
37+
def _(expr: TypedExpr) -> sge.Expression:
38+
# Adjust the 1-based day-of-week index (from SQL) to a 0-based index.
39+
return sge.Extract(
40+
this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr
41+
) - sge.convert(1)
42+
43+
44+
@register_unary_op(ops.dayofyear_op)
45+
def _(expr: TypedExpr) -> sge.Expression:
46+
return sge.Extract(this=sge.Identifier(this="DAYOFYEAR"), expression=expr.expr)
47+
48+
49+
@register_unary_op(ops.iso_day_op)
50+
def _(expr: TypedExpr) -> sge.Expression:
51+
return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr)
52+
53+
54+
@register_unary_op(ops.iso_week_op)
55+
def _(expr: TypedExpr) -> sge.Expression:
56+
return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr)
57+
58+
59+
@register_unary_op(ops.iso_year_op)
60+
def _(expr: TypedExpr) -> sge.Expression:
61+
return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr)
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import sqlglot.expressions as sge
18+
19+
from bigframes import operations as ops
20+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
21+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
22+
23+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
24+
25+
26+
@register_unary_op(ops.FloorDtOp, pass_op=True)
27+
def _(expr: TypedExpr, op: ops.FloorDtOp) -> sge.Expression:
28+
# TODO: Remove this method when it is covered by ops.FloorOp
29+
return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this=op.freq))
30+
31+
32+
@register_unary_op(ops.hour_op)
33+
def _(expr: TypedExpr) -> sge.Expression:
34+
return sge.Extract(this=sge.Identifier(this="HOUR"), expression=expr.expr)
35+
36+
37+
@register_unary_op(ops.minute_op)
38+
def _(expr: TypedExpr) -> sge.Expression:
39+
return sge.Extract(this=sge.Identifier(this="MINUTE"), expression=expr.expr)
40+
41+
42+
@register_unary_op(ops.month_op)
43+
def _(expr: TypedExpr) -> sge.Expression:
44+
return sge.Extract(this=sge.Identifier(this="MONTH"), expression=expr.expr)
45+
46+
47+
@register_unary_op(ops.normalize_op)
48+
def _(expr: TypedExpr) -> sge.Expression:
49+
return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this="DAY"))
50+
51+
52+
@register_unary_op(ops.quarter_op)
53+
def _(expr: TypedExpr) -> sge.Expression:
54+
return sge.Extract(this=sge.Identifier(this="QUARTER"), expression=expr.expr)
55+
56+
57+
@register_unary_op(ops.second_op)
58+
def _(expr: TypedExpr) -> sge.Expression:
59+
return sge.Extract(this=sge.Identifier(this="SECOND"), expression=expr.expr)
60+
61+
62+
@register_unary_op(ops.StrftimeOp, pass_op=True)
63+
def _(expr: TypedExpr, op: ops.StrftimeOp) -> sge.Expression:
64+
return sge.func("FORMAT_TIMESTAMP", sge.convert(op.date_format), expr.expr)
65+
66+
67+
@register_unary_op(ops.time_op)
68+
def _(expr: TypedExpr) -> sge.Expression:
69+
return sge.func("TIME", expr.expr)
70+
71+
72+
@register_unary_op(ops.ToDatetimeOp)
73+
def _(expr: TypedExpr) -> sge.Expression:
74+
return sge.Cast(this=sge.func("TIMESTAMP_SECONDS", expr.expr), to="DATETIME")
75+
76+
77+
@register_unary_op(ops.ToTimestampOp)
78+
def _(expr: TypedExpr) -> sge.Expression:
79+
return sge.func("TIMESTAMP_SECONDS", expr.expr)
80+
81+
82+
@register_unary_op(ops.UnixMicros)
83+
def _(expr: TypedExpr) -> sge.Expression:
84+
return sge.func("UNIX_MICROS", expr.expr)
85+
86+
87+
@register_unary_op(ops.UnixMillis)
88+
def _(expr: TypedExpr) -> sge.Expression:
89+
return sge.func("UNIX_MILLIS", expr.expr)
90+
91+
92+
@register_unary_op(ops.UnixSeconds, pass_op=True)
93+
def _(expr: TypedExpr, op: ops.UnixSeconds) -> sge.Expression:
94+
return sge.func("UNIX_SECONDS", expr.expr)
95+
96+
97+
@register_unary_op(ops.year_op)
98+
def _(expr: TypedExpr) -> sge.Expression:
99+
return sge.Extract(this=sge.Identifier(this="YEAR"), expression=expr.expr)

0 commit comments

Comments
 (0)