Skip to content
13 changes: 13 additions & 0 deletions bigframes/functions/_function_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(
cloud_function_service_account=None,
cloud_function_kms_key_name=None,
cloud_function_docker_repository=None,
cloud_build_service_account=None,
*,
session: Session,
):
Expand All @@ -94,6 +95,7 @@ def __init__(
self._cloud_function_service_account = cloud_function_service_account
self._cloud_function_kms_key_name = cloud_function_kms_key_name
self._cloud_function_docker_repository = cloud_function_docker_repository
self._cloud_build_service_account = cloud_build_service_account

def _create_bq_connection(self) -> None:
if self._bq_connection_manager:
Expand Down Expand Up @@ -452,6 +454,17 @@ def create_cloud_function(
function.build_config.docker_repository = (
self._cloud_function_docker_repository
)

if self._cloud_build_service_account:
canonical_cloud_build_service_account = (
self._cloud_build_service_account
if "/" in self._cloud_build_service_account
else f"projects/{self._gcp_project_id}/serviceAccounts/{self._cloud_build_service_account}"
)
function.build_config.service_account = (
canonical_cloud_build_service_account
)

function.service_config = functions_v2.ServiceConfig()
if memory_mib is not None:
function.service_config.available_memory = f"{memory_mib}Mi"
Expand Down
12 changes: 12 additions & 0 deletions bigframes/functions/_function_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ def remote_function(
cloud_function_ingress_settings: Literal[
"all", "internal-only", "internal-and-gclb"
] = "internal-only",
cloud_build_service_account: Optional[str] = None,
):
"""Decorator to turn a user defined function into a BigQuery remote function.
Expand Down Expand Up @@ -453,6 +454,16 @@ def remote_function(
If no setting is provided, `internal-only` will be used by default.
See for more details
https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings.
cloud_build_service_account (str, Optional):
Service account in the fully qualified format
`projects/PROJECT_ID/serviceAccounts/SERVICE_ACCOUNT_EMAIL`, or
just the SERVICE_ACCOUNT_EMAIL. The latter would be interpreted
as belonging to the BigQuery DataFrames session project. This is
to be used by Cloud Build to build the function source code into
a deployable artifact. If not provided, the default Cloud Build
service account is used. See
https://cloud.google.com/build/docs/cloud-build-service-account
for more details.
"""
# Some defaults may be used from the session if not provided otherwise.
session = self._resolve_session(session)
Expand Down Expand Up @@ -599,6 +610,7 @@ def wrapper(func):
else cloud_function_service_account,
cloud_function_kms_key_name,
cloud_function_docker_repository,
cloud_build_service_account=cloud_build_service_account,
session=session, # type: ignore
)

Expand Down
2 changes: 2 additions & 0 deletions bigframes/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def remote_function(
cloud_function_ingress_settings: Literal[
"all", "internal-only", "internal-and-gclb"
] = "internal-only",
cloud_build_service_account: Optional[str] = None,
):
return global_session.with_default_session(
bigframes.session.Session.remote_function,
Expand All @@ -108,6 +109,7 @@ def remote_function(
cloud_function_vpc_connector=cloud_function_vpc_connector,
cloud_function_memory_mib=cloud_function_memory_mib,
cloud_function_ingress_settings=cloud_function_ingress_settings,
cloud_build_service_account=cloud_build_service_account,
)


Expand Down
12 changes: 12 additions & 0 deletions bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,7 @@ def remote_function(
cloud_function_ingress_settings: Literal[
"all", "internal-only", "internal-and-gclb"
] = "internal-only",
cloud_build_service_account: Optional[str] = None,
):
"""Decorator to turn a user defined function into a BigQuery remote function. Check out
the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.
Expand Down Expand Up @@ -1553,6 +1554,16 @@ def remote_function(
If no setting is provided, `internal-only` will be used by default.
See for more details
https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings.
cloud_build_service_account (str, Optional):
Service account in the fully qualified format
`projects/PROJECT_ID/serviceAccounts/SERVICE_ACCOUNT_EMAIL`, or
just the SERVICE_ACCOUNT_EMAIL. The latter would be interpreted
as belonging to the BigQuery DataFrames session project. This is
to be used by Cloud Build to build the function source code into
a deployable artifact. If not provided, the default Cloud Build
service account is used. See
https://cloud.google.com/build/docs/cloud-build-service-account
for more details.
Returns:
collections.abc.Callable:
A remote function object pointing to the cloud assets created
Expand Down Expand Up @@ -1581,6 +1592,7 @@ def remote_function(
cloud_function_vpc_connector=cloud_function_vpc_connector,
cloud_function_memory_mib=cloud_function_memory_mib,
cloud_function_ingress_settings=cloud_function_ingress_settings,
cloud_build_service_account=cloud_build_service_account,
)

def udf(
Expand Down
76 changes: 75 additions & 1 deletion tests/system/large/functions/test_remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,7 +1342,7 @@ def test_remote_function_via_session_custom_sa(scalars_dfs):
# For upfront convenience, the following set up has been statically created
# in the project bigfrmames-dev-perf via cloud console:
#
# 1. Create a service account as per
# 1. Create a service account bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com as per
# https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console
# 2. Give necessary roles as per
# https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration
Expand Down Expand Up @@ -1395,6 +1395,80 @@ def square_num(x):
)


@pytest.mark.parametrize(
("set_build_service_account"),
[
pytest.param(
"projects/bigframes-dev-perf/serviceAccounts/bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com",
id="fully-qualified-sa",
),
pytest.param(
"bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com",
id="just-sa-email",
),
],
)
@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_via_session_custom_build_sa(
scalars_dfs, set_build_service_account
):
# TODO(shobs): Automate the following set-up during testing in the test project.
#
# For upfront convenience, the following set up has been statically created
# in the project bigfrmames-dev-perf via cloud console:
#
# 1. Create a service account bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com as per
# https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console
# 2. Give "Cloud Build Service Account (roles/cloudbuild.builds.builder)" role as per
# https://cloud.google.com/build/docs/cloud-build-service-account#default_permissions_of_the_legacy_service_account
#
project = "bigframes-dev-perf"
expected_build_service_account = "projects/bigframes-dev-perf/serviceAccounts/bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com"

rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project))

try:

# TODO(shobs): Figure out why the default ingress setting
# (internal-only) does not work here
@rf_session.remote_function(
input_types=[int],
output_type=int,
reuse=False,
cloud_function_service_account="default",
cloud_build_service_account=set_build_service_account,
cloud_function_ingress_settings="all",
)
def square_num(x):
if x is None:
return x
return x * x

# assert that the GCF is created with the intended SA
gcf = rf_session.cloudfunctionsclient.get_function(
name=square_num.bigframes_cloud_function
)
assert gcf.build_config.service_account == expected_build_service_account

# assert that the function works as expected on data
scalars_df, scalars_pandas_df = scalars_dfs

bf_int64_col = scalars_df["int64_col"]
bf_result_col = bf_int64_col.apply(square_num)
bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas()

pd_int64_col = scalars_pandas_df["int64_col"]
pd_result_col = pd_int64_col.apply(lambda x: x if x is None else x * x)
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)

assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
finally:
# clean up the gcp assets created for the remote function
cleanup_function_assets(
square_num, rf_session.bqclient, rf_session.cloudfunctionsclient
)


def test_remote_function_throws_none_cloud_function_service_account(session):
with pytest.raises(
ValueError,
Expand Down