googleapis
diff --git a/‎CHANGELOG.md‎
Lines changed: 29 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎bigframes/bigquery/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎bigframes/bigquery/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎bigframes/bigquery/_operations/json.py‎
Lines changed: 104 additions & 15 deletions b/‎bigframes/bigquery/_operations/json.py‎
Lines changed: 104 additions & 15 deletions
diff --git a/‎bigframes/core/__init__.py‎
Lines changed: 7 additions & 1 deletion b/‎bigframes/core/__init__.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎bigframes/core/blocks.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/blocks.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/compile/api.py‎
Lines changed: 13 additions & 15 deletions b/‎bigframes/core/compile/api.py‎
Lines changed: 13 additions & 15 deletions
@@ -4,6 +4,35 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [1.26.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.25.0...v1.26.0) (2024-11-12)
+
+
+### Features
+
+* Add basic geopandas functionality ([#962](https://github.com/googleapis/python-bigquery-dataframes/issues/962)) ([3759c63](https://github.com/googleapis/python-bigquery-dataframes/commit/3759c6397eaa3c46c4142aa51ca22be3dc8e4971))
+* Support `json_extract_string_array` in the `bigquery` module ([#1131](https://github.com/googleapis/python-bigquery-dataframes/issues/1131)) ([4ef8bac](https://github.com/googleapis/python-bigquery-dataframes/commit/4ef8bacdcc5447ba53c0f354526346f4dec7c5a1))
+
+
+### Bug Fixes
+
+* Fix Series.to_frame generating string label instead of int where name is None ([#1118](https://github.com/googleapis/python-bigquery-dataframes/issues/1118)) ([14e32b5](https://github.com/googleapis/python-bigquery-dataframes/commit/14e32b51c11c1718128f49ef94e754afc0ac0618))
+* Update the API documentation with newly added rep ([#1120](https://github.com/googleapis/python-bigquery-dataframes/issues/1120)) ([72c228b](https://github.com/googleapis/python-bigquery-dataframes/commit/72c228b15627e6047d60ae42740563a6dfea73da))
+
+
+### Performance Improvements
+
+* Reduce CURRENT_TIMESTAMP queries ([#1114](https://github.com/googleapis/python-bigquery-dataframes/issues/1114)) ([32274b1](https://github.com/googleapis/python-bigquery-dataframes/commit/32274b130849b37d7e587643cf7b6d109455ff38))
+* Reduce dry runs from read_gbq with table ([#1129](https://github.com/googleapis/python-bigquery-dataframes/issues/1129)) ([f7e4354](https://github.com/googleapis/python-bigquery-dataframes/commit/f7e435488d630cf4cf493c89ecdde94a95a7a0d7))
+
+
+### Documentation
+
+* Add file for Classification with a Boosted Treed Model and snippet for preparing sample data ([#1135](https://github.com/googleapis/python-bigquery-dataframes/issues/1135)) ([7ac6639](https://github.com/googleapis/python-bigquery-dataframes/commit/7ac6639fb0e8baf5fb3adf5785dffd8cf9b06702))
+* Add snippet for Linear Regression tutorial Predict Outcomes section ([#1101](https://github.com/googleapis/python-bigquery-dataframes/issues/1101)) ([108f4a9](https://github.com/googleapis/python-bigquery-dataframes/commit/108f4a98463596d8df6d381b3580eb72eab41b6e))
+* Update `DataFrame` docstrings to include the errors section ([#1127](https://github.com/googleapis/python-bigquery-dataframes/issues/1127)) ([a38d4c4](https://github.com/googleapis/python-bigquery-dataframes/commit/a38d4c422b6b312f6a54d7b1dd105a474ec2e91a))
+* Update GroupBy docstrings ([#1103](https://github.com/googleapis/python-bigquery-dataframes/issues/1103)) ([9867a78](https://github.com/googleapis/python-bigquery-dataframes/commit/9867a788e7c46bf0850cacbe7cd41a11fea32d6b))
+* Update Session doctrings to include exceptions ([#1130](https://github.com/googleapis/python-bigquery-dataframes/issues/1130)) ([a870421](https://github.com/googleapis/python-bigquery-dataframes/commit/a87042158b181dceee31124fe208926a3bb1071f))
+
 ## [1.25.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.24.0...v1.25.0) (2024-10-29)
 
 
 
@@ -25,6 +25,7 @@
 from bigframes.bigquery._operations.json import (
  json_extract,
  json_extract_array,
+ json_extract_string_array,
  json_set,
 )
 from bigframes.bigquery._operations.search import create_vector_index, vector_search
@@ -37,6 +38,7 @@
  "json_set",
  "json_extract",
  "json_extract_array",
+ "json_extract_string_array",
  "approx_top_count",
  "struct",
  "create_vector_index",
 
@@ -21,14 +21,17 @@
 
 from __future__ import annotations
 
-from typing import Any, Sequence, Tuple
+from typing import Any, cast, Optional, Sequence, Tuple, Union
 
+import bigframes.dtypes
 import bigframes.operations as ops
 import bigframes.series as series
 
+from . import array
+
 
 def json_set(
- series: series.Series,
+ input: series.Series,
  json_path_value_pairs: Sequence[Tuple[str, Any]],
 ) -> series.Series:
  """Produces a new JSON value within a Series by inserting or replacing values at
@@ -47,7 +50,7 @@ def json_set(
  Name: data, dtype: string
 
  Args:
- series (bigframes.series.Series):
+ input (bigframes.series.Series):
  The Series containing JSON data (as native JSON objects or JSON-formatted strings).
  json_path_value_pairs (Sequence[Tuple[str, Any]]):
  Pairs of JSON path and the new value to insert/replace.
@@ -59,6 +62,7 @@ def json_set(
  # SQLGlot parser does not support the "create_if_missing => true" syntax, so
  # create_if_missing is not currently implemented.
 
+ result = input
  for json_path_value_pair in json_path_value_pairs:
  if len(json_path_value_pair) != 2:
  raise ValueError(
@@ -67,14 +71,14 @@ def json_set(
  )
 
  json_path, json_value = json_path_value_pair
- series = series._apply_binary_op(
+ result = result._apply_binary_op(
  json_value, ops.JSONSet(json_path=json_path), alignment="left"
  )
- return series
+ return result
 
 
 def json_extract(
- series: series.Series,
+ input: series.Series,
  json_path: str,
 ) -> series.Series:
  """Extracts a JSON value and converts it to a SQL JSON-formatted `STRING` or `JSON`
@@ -93,24 +97,24 @@ def json_extract(
  dtype: string
 
  Args:
- series (bigframes.series.Series):
+ input (bigframes.series.Series):
  The Series containing JSON data (as native JSON objects or JSON-formatted strings).
  json_path (str):
  The JSON path identifying the data that you want to obtain from the input.
 
  Returns:
  bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
  """
- return series._apply_unary_op(ops.JSONExtract(json_path=json_path))
+ return input._apply_unary_op(ops.JSONExtract(json_path=json_path))
 
 
 def json_extract_array(
- series: series.Series,
+ input: series.Series,
  json_path: str = "$",
 ) -> series.Series:
- """Extracts a JSON array and converts it to a SQL array of JSON-formatted `STRING` or `JSON`
- values. This function uses single quotes and brackets to escape invalid JSONPath
- characters in JSON keys.
+ """Extracts a JSON array and converts it to a SQL array of JSON-formatted
+ `STRING` or `JSON` values. This function uses single quotes and brackets to
+ escape invalid JSONPath characters in JSON keys.
 
  **Examples:**
 
@@ -124,13 +128,98 @@ def json_extract_array(
  1 ['4' '5']
  dtype: list<item: string>[pyarrow]
 
+ >>> s = bpd.Series([
+ ... '{"fruits": [{"name": "apple"}, {"name": "cherry"}]}',
+ ... '{"fruits": [{"name": "guava"}, {"name": "grapes"}]}'
+ ... ])
+ >>> bbq.json_extract_array(s, "$.fruits")
+ 0 ['{"name":"apple"}' '{"name":"cherry"}']
+ 1 ['{"name":"guava"}' '{"name":"grapes"}']
+ dtype: list<item: string>[pyarrow]
+
+ >>> s = bpd.Series([
+ ... '{"fruits": {"color": "red", "names": ["apple","cherry"]}}',
+ ... '{"fruits": {"color": "green", "names": ["guava", "grapes"]}}'
+ ... ])
+ >>> bbq.json_extract_array(s, "$.fruits.names")
+ 0 ['"apple"' '"cherry"']
+ 1 ['"guava"' '"grapes"']
+ dtype: list<item: string>[pyarrow]
+
  Args:
- series (bigframes.series.Series):
+ input (bigframes.series.Series):
  The Series containing JSON data (as native JSON objects or JSON-formatted strings).
  json_path (str):
  The JSON path identifying the data that you want to obtain from the input.
 
  Returns:
- bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
+ bigframes.series.Series: A new Series with the parsed arrays from the input.
  """
- return series._apply_unary_op(ops.JSONExtractArray(json_path=json_path))
+ return input._apply_unary_op(ops.JSONExtractArray(json_path=json_path))
+
+
+def json_extract_string_array(
+ input: series.Series,
+ json_path: str = "$",
+ value_dtype: Optional[
+ Union[bigframes.dtypes.Dtype, bigframes.dtypes.DtypeString]
+ ] = None,
+) -> series.Series:
+ """Extracts a JSON array and converts it to a SQL array of `STRING` values.
+ A `value_dtype` can be provided to further coerce the data type of the
+ values in the array. This function uses single quotes and brackets to escape
+ invalid JSONPath characters in JSON keys.
+
+ **Examples:**
+
+ >>> import bigframes.pandas as bpd
+ >>> import bigframes.bigquery as bbq
+ >>> bpd.options.display.progress_bar = None
+
+ >>> s = bpd.Series(['[1, 2, 3]', '[4, 5]'])
+ >>> bbq.json_extract_string_array(s)
+ 0 ['1' '2' '3']
+ 1 ['4' '5']
+ dtype: list<item: string>[pyarrow]
+
+ >>> bbq.json_extract_string_array(s, value_dtype='Int64')
+ 0 [1 2 3]
+ 1 [4 5]
+ dtype: list<item: int64>[pyarrow]
+
+ >>> s = bpd.Series([
+ ... '{"fruits": {"color": "red", "names": ["apple","cherry"]}}',
+ ... '{"fruits": {"color": "green", "names": ["guava", "grapes"]}}'
+ ... ])
+ >>> bbq.json_extract_string_array(s, "$.fruits.names")
+ 0 ['apple' 'cherry']
+ 1 ['guava' 'grapes']
+ dtype: list<item: string>[pyarrow]
+
+ Args:
+ input (bigframes.series.Series):
+ The Series containing JSON data (as native JSON objects or JSON-formatted strings).
+ json_path (str):
+ The JSON path identifying the data that you want to obtain from the input.
+ value_dtype (dtype, Optional):
+ The data type supported by BigFrames DataFrame.
+
+ Returns:
+ bigframes.series.Series: A new Series with the parsed arrays from the input.
+ """
+ array_series = input._apply_unary_op(
+ ops.JSONExtractStringArray(json_path=json_path)
+ )
+ if value_dtype not in [None, bigframes.dtypes.STRING_DTYPE]:
+ array_items_series = array_series.explode()
+ if value_dtype == bigframes.dtypes.BOOL_DTYPE:
+ array_items_series = array_items_series.str.lower() == "true"
+ else:
+ array_items_series = array_items_series.astype(value_dtype)
+ array_series = cast(
+ series.Series,
+ array.array_agg(
+ array_items_series.groupby(level=input.index.names, dropna=False)
+ ),
+ )
+ return array_series
@@ -268,7 +268,13 @@ def promote_offsets(self) -> Tuple[ArrayValue, str]:
  def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
  """Append together multiple ArrayValue objects."""
  return ArrayValue(
- nodes.ConcatNode(children=tuple([self.node, *[val.node for val in other]]))
+ nodes.ConcatNode(
+ children=tuple([self.node, *[val.node for val in other]]),
+ output_ids=tuple(
+ ids.ColumnId(bigframes.core.guid.generate_guid())
+ for id in self.column_ids
+ ),
+ )
  )
 
  def compute_values(self, assignments: Sequence[ex.Expression]):
 
@@ -3137,7 +3137,7 @@ def _pd_index_to_array_value(
  rows = []
  labels_as_tuples = utils.index_as_tuples(index)
  for row_offset in range(len(index)):
- id_gen = bigframes.core.identifiers.standard_identifiers()
+ id_gen = bigframes.core.identifiers.standard_id_strings()
  row_label = labels_as_tuples[row_offset]
  row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
  row = {}
 
@@ -18,14 +18,15 @@
 import google.cloud.bigquery as bigquery
 
 import bigframes.core.compile.compiler as compiler
-import bigframes.core.rewrite as rewrites
 
 if TYPE_CHECKING:
  import bigframes.core.nodes
  import bigframes.core.ordering
  import bigframes.core.schema
 
-_STRICT_COMPILER = compiler.Compiler(strict=True)
+_STRICT_COMPILER = compiler.Compiler(
+ strict=True, enable_pruning=True, enable_densify_ids=True
+)
 
 
 class SQLCompiler:
@@ -34,7 +35,7 @@ def __init__(self, strict: bool = True):
 
  def compile_peek(self, node: bigframes.core.nodes.BigFrameNode, n_rows: int) -> str:
  """Compile node into sql that selects N arbitrary rows, may not execute deterministically."""
- return self._compiler.compile_unordered_ir(node).peek_sql(n_rows)
+ return self._compiler.compile_peek_sql(node, n_rows)
 
  def compile_unordered(
  self,
@@ -44,9 +45,8 @@ def compile_unordered(
  ) -> str:
  """Compile node into sql where rows are unsorted, and no ordering information is preserved."""
  # TODO: Enable limit pullup, but only if not being used to write to clustered table.
- return self._compiler.compile_unordered_ir(node).to_sql(
- col_id_overrides=col_id_overrides
- )
+ output_ids = [col_id_overrides.get(id, id) for id in node.schema.names]
+ return self._compiler.compile_sql(node, ordered=False, output_ids=output_ids)
 
  def compile_ordered(
  self,
@@ -56,10 +56,8 @@ def compile_ordered(
  ) -> str:
  """Compile node into sql where rows are sorted with ORDER BY."""
  # If we are ordering the query anyways, compiling the slice as a limit is probably a good idea.
- new_node, limit = rewrites.pullup_limit_from_slice(node)
- return self._compiler.compile_ordered_ir(new_node).to_sql(
- col_id_overrides=col_id_overrides, ordered=True, limit=limit
- )
+ output_ids = [col_id_overrides.get(id, id) for id in node.schema.names]
+ return self._compiler.compile_sql(node, ordered=True, output_ids=output_ids)
 
  def compile_raw(
  self,
@@ -68,13 +66,12 @@ def compile_raw(
  str, Sequence[bigquery.SchemaField], bigframes.core.ordering.RowOrdering
  ]:
  """Compile node into sql that exposes all columns, including hidden ordering-only columns."""
- ir = self._compiler.compile_ordered_ir(node)
- sql, schema = ir.raw_sql_and_schema()
- return sql, schema, ir._ordering
+ return self._compiler.compile_raw(node)
 
 
 def test_only_try_evaluate(node: bigframes.core.nodes.BigFrameNode):
  """Use only for unit testing paths - not fully featured. Will throw exception if fails."""
+ node = _STRICT_COMPILER._preprocess(node)
  ibis = _STRICT_COMPILER.compile_ordered_ir(node)._to_ibis_expr(
  ordering_mode="unordered"
  )
@@ -85,9 +82,10 @@ def test_only_ibis_inferred_schema(node: bigframes.core.nodes.BigFrameNode):
  """Use only for testing paths to ensure ibis inferred schema does not diverge from bigframes inferred schema."""
  import bigframes.core.schema
 
+ node = _STRICT_COMPILER._preprocess(node)
  compiled = _STRICT_COMPILER.compile_unordered_ir(node)
  items = tuple(
- bigframes.core.schema.SchemaItem(id, compiled.get_column_type(id))
- for id in compiled.column_ids
+ bigframes.core.schema.SchemaItem(name, compiled.get_column_type(ibis_id))
+ for name, ibis_id in zip(node.schema.names, compiled.column_ids)
  )
  return bigframes.core.schema.ArraySchema(items)