Skip to content

Commit b39a4b7

Browse files
authored
chore: add experimental series. blob.metadata() function (#1169)
1 parent f80d705 commit b39a4b7

File tree

5 files changed

+65
-0
lines changed

5 files changed

+65
-0
lines changed

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@
4242
# ln(2**(2**10)) == (2**10)*ln(2) ~= 709.78, so EXP(x) for x>709.78 will overflow.
4343
_FLOAT64_EXP_BOUND = typing.cast(ibis_types.NumericValue, ibis_types.literal(709.78))
4444

45+
_OBJ_REF_STRUCT_SCHEMA = (
46+
("uri", ibis_dtypes.String),
47+
("version", ibis_dtypes.String),
48+
("authorizer", ibis_dtypes.String),
49+
("details", ibis_dtypes.JSON),
50+
)
51+
_OBJ_REF_IBIS_DTYPE = ibis_dtypes.Struct.from_tuples(_OBJ_REF_STRUCT_SCHEMA)
52+
4553
# Datetime constants
4654
UNIT_TO_US_CONVERSION_FACTORS = {
4755
"W": 7 * 24 * 60 * 60 * 1000 * 1000,
@@ -1161,6 +1169,12 @@ def json_extract_string_array_op_impl(
11611169
return json_extract_string_array(json_obj=x, json_path=op.json_path)
11621170

11631171

1172+
# Blob Ops
1173+
@scalar_op_compiler.register_unary_op(ops.obj_fetch_metadata_op)
1174+
def obj_fetch_metadata_op_impl(x: ibis_types.Value):
1175+
return obj_fetch_metadata(obj_ref=x)
1176+
1177+
11641178
### Binary Ops
11651179
def short_circuit_nulls(type_override: typing.Optional[ibis_dtypes.DataType] = None):
11661180
"""Wraps a binary operator to generate nulls of the expected type if either input is a null scalar."""
@@ -1832,3 +1846,8 @@ def json_extract_string_array(
18321846
@ibis.udf.scalar.builtin(name="ML.DISTANCE")
18331847
def vector_distance(vector1, vector2, type: str) -> ibis_dtypes.Float64:
18341848
"""Computes the distance between two vectors using specified type ("EUCLIDEAN", "MANHATTAN", or "COSINE")"""
1849+
1850+
1851+
@ibis.udf.scalar.builtin(name="OBJ.FETCH_METADATA")
1852+
def obj_fetch_metadata(obj_ref: _OBJ_REF_IBIS_DTYPE) -> _OBJ_REF_IBIS_DTYPE: # type: ignore
1853+
"""Fetch metadata from ObjectRef Struct."""

bigframes/operations/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,10 @@ def create_binary_op(
326326
log10_op = create_unary_op(name="log10", type_signature=op_typing.UNARY_REAL_NUMERIC)
327327
log1p_op = create_unary_op(name="log1p", type_signature=op_typing.UNARY_REAL_NUMERIC)
328328
sqrt_op = create_unary_op(name="sqrt", type_signature=op_typing.UNARY_REAL_NUMERIC)
329+
## Blob Ops
330+
obj_fetch_metadata_op = create_unary_op(
331+
name="obj_fetch_metadata", type_signature=op_typing.BLOB_TRANSFORM
332+
)
329333

330334

331335
# Parameterized unary ops

bigframes/operations/blob.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import bigframes
18+
from bigframes.operations import base
19+
import bigframes.operations as ops
20+
21+
22+
class BlobAccessor(base.SeriesMethods):
23+
def __init__(self, *args, **kwargs):
24+
if not bigframes.options.experiments.blob:
25+
raise NotImplementedError()
26+
27+
super().__init__(*args, **kwargs)
28+
29+
def metadata(self):
30+
details_json = self._apply_unary_op(ops.obj_fetch_metadata_op).struct.field(
31+
"details"
32+
)
33+
34+
import bigframes.bigquery as bbq
35+
36+
return bbq.json_extract(details_json, "$.gcs_metadata")

bigframes/operations/type.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ def output_type(
213213
UNARY_REAL_NUMERIC = UnaryRealNumeric()
214214
BINARY_NUMERIC = BinaryNumeric()
215215
BINARY_REAL_NUMERIC = BinaryRealNumeric()
216+
BLOB_TRANSFORM = TypePreserving(bigframes.dtypes.is_struct_like, description="blob")
216217
COMPARISON = Comparison()
217218
COERCE = CoerceCommon()
218219
LOGICAL = Logical()

bigframes/series.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import bigframes.operations as ops
5555
import bigframes.operations.aggregations as agg_ops
5656
import bigframes.operations.base
57+
import bigframes.operations.blob as blob
5758
import bigframes.operations.datetimes as dt
5859
import bigframes.operations.lists as lists
5960
import bigframes.operations.plotting as plotting
@@ -187,6 +188,10 @@ def struct(self) -> structs.StructAccessor:
187188
def list(self) -> lists.ListAccessor:
188189
return lists.ListAccessor(self._block)
189190

191+
@property
192+
def blob(self) -> blob.BlobAccessor:
193+
return blob.BlobAccessor(self._block)
194+
190195
@property
191196
@validations.requires_ordering()
192197
def T(self) -> Series:

0 commit comments

Comments
 (0)