@@ -167,7 +167,23 @@ def get_remote_function_locations(bq_location):
167167
168168def _get_hash (def_ , package_requirements = None ):
169169 "Get hash (32 digits alphanumeric) of a function."
170- def_repr = cloudpickle .dumps (def_ , protocol = _pickle_protocol_version )
170+ # There is a known cell-id sensitivity of the cloudpickle serialization in
171+ # notebooks https://github.com/cloudpipe/cloudpickle/issues/538. Because of
172+ # this, if a cell contains a udf decorated with @remote_function, a unique
173+ # cloudpickle code is generated every time the cell is run, creating new
174+ # cloud artifacts every time. This is slow and wasteful.
175+ # A workaround of the same can be achieved by replacing the filename in the
176+ # code object to a static value
177+ # https://github.com/cloudpipe/cloudpickle/issues/120#issuecomment-338510661.
178+ #
179+ # To respect the user code/environment let's make this modification on a
180+ # copy of the udf, not on the original udf itself.
181+ def_copy = cloudpickle .loads (cloudpickle .dumps (def_ ))
182+ def_copy .__code__ = def_copy .__code__ .replace (
183+ co_filename = "bigframes_place_holder_filename"
184+ )
185+
186+ def_repr = cloudpickle .dumps (def_copy , protocol = _pickle_protocol_version )
171187 if package_requirements :
172188 for p in sorted (package_requirements ):
173189 def_repr += p .encode ()
@@ -877,11 +893,16 @@ def remote_function(
877893 dynamically using the `bigquery_connection_client` assuming the user has necessary
878894 priviliges. The PROJECT_ID should be the same as the BigQuery connection project.
879895 reuse (bool, Optional):
880- Reuse the remote function if is already exists.
881- `True` by default, which results in reusing an existing remote
896+ Reuse the remote function if already exists.
897+ `True` by default, which will result in reusing an existing remote
882898 function and corresponding cloud function (if any) that was
883899 previously created for the same udf.
884- Setting it to `False` forces the creation of a unique remote function.
900+ Please note that for an unnamed (i.e. created without an explicit
901+ `name` argument) remote function, the BigQuery DataFrames
902+ session id is attached in the cloud artifacts names. So for the
903+ effective reuse across the sessions it is recommended to create
904+ the remote function with an explicit `name`.
905+ Setting it to `False` would force creating a unique remote function.
885906 If the required remote function does not exist then it would be
886907 created irrespective of this param.
887908 name (str, Optional):
0 commit comments