googleapis · tswast · Jun 18, 2025 · Jun 18, 2025 · Jun 18, 2025 · Jun 18, 2025
@@ -303,6 +303,7 @@ def get_runtime_json_str(
  def exif(
  self,
  *,
+ engine: Literal[None, "pillow"] = None,
  connection: Optional[str] = None,
  max_batching_rows: int = 8192,
  container_cpu: Union[float, int] = 0.33,
@@ -311,6 +312,7 @@ def exif(
  """Extract EXIF data. Now only support image types.
 
  Args:
+ engine ('pillow' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
  connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
  max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function.
  container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
@@ -319,6 +321,8 @@ def exif(
  Returns:
  bigframes.series.Series: JSON series of key-value pairs.
  """
+ if engine is None or engine.casefold() != "pillow":
+ raise ValueError("Must specify the engine, supported value is 'pillow'.")
 
  import bigframes.bigquery as bbq
  import bigframes.blob._functions as blob_func
@@ -344,6 +348,7 @@ def image_blur(
  self,
  ksize: tuple[int, int],
  *,
+ engine: Literal[None, "opencv"] = None,
  dst: Optional[Union[str, bigframes.series.Series]] = None,
  connection: Optional[str] = None,
  max_batching_rows: int = 8192,
@@ -354,6 +359,7 @@ def image_blur(
 
  Args:
  ksize (tuple(int, int)): Kernel size.
+ engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
  dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of:
  str: GCS folder str. The output filenames are the same as the input files.
  blob Series: The output file paths are determined by the uris of the blob Series.
@@ -367,6 +373,9 @@ def image_blur(
  Returns:
  bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
  """
+ if engine is None or engine.casefold() != "opencv":
+ raise ValueError("Must specify the engine, supported value is 'opencv'.")
+
  import bigframes.blob._functions as blob_func
 
  connection = self._resolve_connection(connection)
@@ -424,6 +433,7 @@ def image_resize(
  self,
  dsize: tuple[int, int] = (0, 0),
  *,
+ engine: Literal[None, "opencv"] = None,
  fx: float = 0.0,
  fy: float = 0.0,
  dst: Optional[Union[str, bigframes.series.Series]] = None,
@@ -436,6 +446,7 @@ def image_resize(
 
  Args:
  dsize (tuple(int, int), default (0, 0)): Destination size. If set to 0, fx and fy parameters determine the size.
+ engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
  fx (float, default 0.0): scale factor along the horizontal axis. If set to 0.0, dsize parameter determines the output size.
  fy (float, defalut 0.0): scale factor along the vertical axis. If set to 0.0, dsize parameter determines the output size.
  dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of:
@@ -451,6 +462,9 @@ def image_resize(
  Returns:
  bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
  """
+ if engine is None or engine.casefold() != "opencv":
+ raise ValueError("Must specify the engine, supported value is 'opencv'.")
+
  dsize_set = dsize[0] > 0 and dsize[1] > 0
  fsize_set = fx > 0.0 and fy > 0.0
  if not dsize_set ^ fsize_set:
@@ -516,6 +530,7 @@ def image_resize(
  def image_normalize(
  self,
  *,
+ engine: Literal[None, "opencv"] = None,
  alpha: float = 1.0,
  beta: float = 0.0,
  norm_type: str = "l2",
@@ -528,6 +543,7 @@ def image_normalize(
  """Normalize images.
 
  Args:
+ engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
  alpha (float, default 1.0): Norm value to normalize to or the lower range boundary in case of the range normalization.
  beta (float, default 0.0): Upper range boundary in case of the range normalization; it is not used for the norm normalization.
  norm_type (str, default "l2"): Normalization type. Accepted values are "inf", "l1", "l2" and "minmax".
@@ -544,6 +560,9 @@ def image_normalize(
  Returns:
  bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
  """
+ if engine is None or engine.casefold() != "opencv":
+ raise ValueError("Must specify the engine, supported value is 'opencv'.")
+
  import bigframes.blob._functions as blob_func
 
  connection = self._resolve_connection(connection)
@@ -604,6 +623,7 @@ def image_normalize(
  def pdf_extract(
  self,
  *,
+ engine: Literal[None, "pypdf"] = None,
  connection: Optional[str] = None,
  max_batching_rows: int = 1,
  container_cpu: Union[float, int] = 2,
@@ -613,6 +633,7 @@ def pdf_extract(
  """Extracts text from PDF URLs and saves the text as string.
 
  Args:
+ engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
  connection (str or None, default None): BQ connection used for
  function internet transactions, and the output blob if "dst"
  is str. If None, uses default connection of the session.
@@ -631,6 +652,9 @@ def pdf_extract(
  Contains the extracted text from the PDF file.
  Includes error messages if verbosity is enabled.
  """
+ if engine is None or engine.casefold() != "pypdf":
+ raise ValueError("Must specify the engine, supported value is 'pypdf'.")
+
  import bigframes.bigquery as bbq
  import bigframes.blob._functions as blob_func
  import bigframes.pandas as bpd
@@ -663,6 +687,7 @@ def pdf_extract(
  def pdf_chunk(
  self,
  *,
+ engine: Literal[None, "pypdf"] = None,
  connection: Optional[str] = None,
  chunk_size: int = 2000,
  overlap_size: int = 200,
@@ -675,6 +700,7 @@ def pdf_chunk(
  arrays of strings.
 
  Args:
+ engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
  connection (str or None, default None): BQ connection used for
  function internet transactions, and the output blob if "dst"
  is str. If None, uses default connection of the session.
@@ -698,6 +724,8 @@ def pdf_chunk(
  where each string is a chunk of text extracted from PDF.
  Includes error messages if verbosity is enabled.
  """
+ if engine is None or engine.casefold() != "pypdf":
+ raise ValueError("Must specify the engine, supported value is 'pypdf'.")
 
  import bigframes.bigquery as bbq
  import bigframes.blob._functions as blob_func
@@ -740,6 +768,7 @@ def pdf_chunk(
  def audio_transcribe(
  self,
  *,
+ engine: Literal["bigquery"] = "bigquery",
  connection: Optional[str] = None,
  model_name: Optional[
  Literal[
@@ -753,6 +782,7 @@ def audio_transcribe(
  Transcribe audio content using a Gemini multimodal model.
 
  Args:
+ engine ('bigquery'): The engine (bigquery or third party library) used for the function.
  connection (str or None, default None): BQ connection used for
  function internet transactions, and the output blob if "dst"
  is str. If None, uses default connection of the session.
@@ -770,6 +800,9 @@ def audio_transcribe(
  Contains the transcribed text from the audio file.
  Includes error messages if verbosity is enabled.
  """
+ if engine.casefold() != "bigquery":
+ raise ValueError("Must specify the engine, supported value is 'bigquery'.")
+
  import bigframes.bigquery as bbq
  import bigframes.ml.llm as llm
  import bigframes.pandas as bpd

@@ -254,16 +254,17 @@
  "outputs": [],
  "source": [
  "df_image[\"blurred\"] = df_image[\"image\"].blob.image_blur(\n",
- " (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\"\n",
+ " (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", engine=\"opencv\"\n",
  ")\n",
  "df_image[\"resized\"] = df_image[\"image\"].blob.image_resize(\n",
- " (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\"\n",
+ " (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", engine=\"opencv\"\n",
  ")\n",
  "df_image[\"normalized\"] = df_image[\"image\"].blob.image_normalize(\n",
  " alpha=50.0,\n",
  " beta=150.0,\n",
  " norm_type=\"minmax\",\n",
  " dst=f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\",\n",
+ " engine=\"opencv\",\n",
  ")"
  ]
  },
@@ -280,7 +281,7 @@
  "outputs": [],
  "source": [
  "# You can also chain functions together\n",
- "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\")"
+ "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", engine=\"opencv\")"
  ]
  },
  {
@@ -419,7 +420,7 @@
  },
  "outputs": [],
  "source": [
- "df_pdf[\"chunked\"] = df_pdf[\"pdf\"].blob.pdf_chunk()"
+ "df_pdf[\"chunked\"] = df_pdf[\"pdf\"].blob.pdf_chunk(engine=\"pypdf\")"
  ]
  },
  {

@@ -56,21 +56,22 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
 
  # [START bigquery_dataframes_multimodal_dataframe_image_transform]
  df_image["blurred"] = df_image["image"].blob.image_blur(
- (20, 20), dst=f"{dst_bucket}/image_blur_transformed/"
+ (20, 20), dst=f"{dst_bucket}/image_blur_transformed/", engine="opencv"
  )
  df_image["resized"] = df_image["image"].blob.image_resize(
- (300, 200), dst=f"{dst_bucket}/image_resize_transformed/"
+ (300, 200), dst=f"{dst_bucket}/image_resize_transformed/", engine="opencv"
  )
  df_image["normalized"] = df_image["image"].blob.image_normalize(
  alpha=50.0,
  beta=150.0,
  norm_type="minmax",
  dst=f"{dst_bucket}/image_normalize_transformed/",
+ engine="opencv",
  )
 
  # You can also chain functions together
  df_image["blur_resized"] = df_image["blurred"].blob.image_resize(
- (300, 200), dst=f"{dst_bucket}/image_blur_resize_transformed/"
+ (300, 200), dst=f"{dst_bucket}/image_blur_resize_transformed/", engine="opencv"
  )
  df_image
  # [END bigquery_dataframes_multimodal_dataframe_image_transform]
@@ -113,7 +114,7 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
  df_pdf = bpd.from_glob_path(
  "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*", name="pdf"
  )
- df_pdf["chunked"] = df_pdf["pdf"].blob.pdf_chunk()
+ df_pdf["chunked"] = df_pdf["pdf"].blob.pdf_chunk(engine="pypdf")
  chunked = df_pdf["chunked"].explode()
  chunked
  # [END bigquery_dataframes_multimodal_dataframe_pdf_chunk]

@@ -61,7 +61,7 @@ def test_blob_exif(
  connection=bq_connection,
  )
 
- actual = exif_image_df["blob_col"].blob.exif(connection=bq_connection)
+ actual = exif_image_df["blob_col"].blob.exif(engine="pillow", connection=bq_connection)
  expected = bpd.Series(
  ['{"ExifOffset": 47, "Make": "MyCamera"}'],
  session=session,
@@ -86,7 +86,7 @@ def test_blob_image_blur_to_series(
  )
 
  actual = images_mm_df["blob_col"].blob.image_blur(
- (8, 8), dst=series, connection=bq_connection
+ (8, 8), dst=series, connection=bq_connection, engine="opencv"
  )
  expected_df = pd.DataFrame(
  {
@@ -114,7 +114,7 @@ def test_blob_image_blur_to_folder(
  images_output_uris: list[str],
 ):
  actual = images_mm_df["blob_col"].blob.image_blur(
- (8, 8), dst=images_output_folder, connection=bq_connection
+ (8, 8), dst=images_output_folder, connection=bq_connection, engine="opencv"
  )
  expected_df = pd.DataFrame(
  {
@@ -136,7 +136,9 @@ def test_blob_image_blur_to_folder(
 
 
 def test_blob_image_blur_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str):
- actual = images_mm_df["blob_col"].blob.image_blur((8, 8), connection=bq_connection)
+ actual = images_mm_df["blob_col"].blob.image_blur(
+ (8, 8), connection=bq_connection, engine="opencv"
+ )
 
  assert isinstance(actual, bpd.Series)
  assert len(actual) == 2
@@ -154,7 +156,7 @@ def test_blob_image_resize_to_series(
  )
 
  actual = images_mm_df["blob_col"].blob.image_resize(
- (200, 300), dst=series, connection=bq_connection
+ (200, 300), dst=series, connection=bq_connection, engine="opencv"
  )
  expected_df = pd.DataFrame(
  {
@@ -182,7 +184,7 @@ def test_blob_image_resize_to_folder(
  images_output_uris: list[str],
 ):
  actual = images_mm_df["blob_col"].blob.image_resize(
- (200, 300), dst=images_output_folder, connection=bq_connection
+ (200, 300), dst=images_output_folder, connection=bq_connection, engine="opencv"
  )
  expected_df = pd.DataFrame(
  {
@@ -205,7 +207,7 @@ def test_blob_image_resize_to_folder(
 
 def test_blob_image_resize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str):
  actual = images_mm_df["blob_col"].blob.image_resize(
- (200, 300), connection=bq_connection
+ (200, 300), connection=bq_connection, engine="opencv"
  )
 
  assert isinstance(actual, bpd.Series)
@@ -224,7 +226,12 @@ def test_blob_image_normalize_to_series(
  )
 
  actual = images_mm_df["blob_col"].blob.image_normalize(
- alpha=50.0, beta=150.0, norm_type="minmax", dst=series, connection=bq_connection
+ alpha=50.0,
+ beta=150.0,
+ norm_type="minmax",
+ dst=series,
+ connection=bq_connection,
+ engine="opencv",
  )
  expected_df = pd.DataFrame(
  {
@@ -257,6 +264,7 @@ def test_blob_image_normalize_to_folder(
  norm_type="minmax",
  dst=images_output_folder,
  connection=bq_connection,
+ engine="opencv",
  )
  expected_df = pd.DataFrame(
  {
@@ -279,7 +287,11 @@ def test_blob_image_normalize_to_folder(
 
 def test_blob_image_normalize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str):
  actual = images_mm_df["blob_col"].blob.image_normalize(
- alpha=50.0, beta=150.0, norm_type="minmax", connection=bq_connection
+ alpha=50.0,
+ beta=150.0,
+ norm_type="minmax",
+ connection=bq_connection,
+ engine="opencv",
  )
 
  assert isinstance(actual, bpd.Series)
@@ -322,7 +334,7 @@ def test_blob_pdf_extract(
 ):
  actual = (
  pdf_mm_df["pdf"]
- .blob.pdf_extract(connection=bq_connection, verbose=verbose)
+ .blob.pdf_extract(connection=bq_connection, verbose=verbose, engine="pypdf")
  .explode()
  .to_pandas()
  )
@@ -373,7 +385,11 @@ def test_blob_pdf_chunk(
  actual = (
  pdf_mm_df["pdf"]
  .blob.pdf_chunk(
- connection=bq_connection, chunk_size=50, overlap_size=10, verbose=verbose
+ connection=bq_connection,
+ chunk_size=50,
+ overlap_size=10,
+ verbose=verbose,
+ engine="pypdf",
  )
  .explode()
  .to_pandas()