2121
2222from __future__ import annotations
2323
24- from typing import Any , Sequence , Tuple
24+ from typing import Any , cast , Optional , Sequence , Tuple , Union
2525
26+ import bigframes .dtypes
2627import bigframes .operations as ops
2728import bigframes .series as series
2829
30+ from . import array
31+
2932
3033def json_set (
31- series : series .Series ,
34+ input : series .Series ,
3235 json_path_value_pairs : Sequence [Tuple [str , Any ]],
3336) -> series .Series :
3437 """Produces a new JSON value within a Series by inserting or replacing values at
@@ -47,7 +50,7 @@ def json_set(
4750 Name: data, dtype: string
4851
4952 Args:
50- series (bigframes.series.Series):
53+ input (bigframes.series.Series):
5154 The Series containing JSON data (as native JSON objects or JSON-formatted strings).
5255 json_path_value_pairs (Sequence[Tuple[str, Any]]):
5356 Pairs of JSON path and the new value to insert/replace.
@@ -59,6 +62,7 @@ def json_set(
5962 # SQLGlot parser does not support the "create_if_missing => true" syntax, so
6063 # create_if_missing is not currently implemented.
6164
65+ result = input
6266 for json_path_value_pair in json_path_value_pairs :
6367 if len (json_path_value_pair ) != 2 :
6468 raise ValueError (
@@ -67,14 +71,14 @@ def json_set(
6771 )
6872
6973 json_path , json_value = json_path_value_pair
70- series = series ._apply_binary_op (
74+ result = result ._apply_binary_op (
7175 json_value , ops .JSONSet (json_path = json_path ), alignment = "left"
7276 )
73- return series
77+ return result
7478
7579
7680def json_extract (
77- series : series .Series ,
81+ input : series .Series ,
7882 json_path : str ,
7983) -> series .Series :
8084 """Extracts a JSON value and converts it to a SQL JSON-formatted `STRING` or `JSON`
@@ -93,24 +97,24 @@ def json_extract(
9397 dtype: string
9498
9599 Args:
96- series (bigframes.series.Series):
100+ input (bigframes.series.Series):
97101 The Series containing JSON data (as native JSON objects or JSON-formatted strings).
98102 json_path (str):
99103 The JSON path identifying the data that you want to obtain from the input.
100104
101105 Returns:
102106 bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
103107 """
104- return series ._apply_unary_op (ops .JSONExtract (json_path = json_path ))
108+ return input ._apply_unary_op (ops .JSONExtract (json_path = json_path ))
105109
106110
107111def json_extract_array (
108- series : series .Series ,
112+ input : series .Series ,
109113 json_path : str = "$" ,
110114) -> series .Series :
111- """Extracts a JSON array and converts it to a SQL array of JSON-formatted `STRING` or `JSON`
112- values. This function uses single quotes and brackets to escape invalid JSONPath
113- characters in JSON keys.
115+ """Extracts a JSON array and converts it to a SQL array of JSON-formatted
116+ `STRING` or `JSON` values. This function uses single quotes and brackets to
117+ escape invalid JSONPath characters in JSON keys.
114118
115119 **Examples:**
116120
@@ -124,13 +128,98 @@ def json_extract_array(
124128 1 ['4' '5']
125129 dtype: list<item: string>[pyarrow]
126130
131+ >>> s = bpd.Series([
132+ ... '{"fruits": [{"name": "apple"}, {"name": "cherry"}]}',
133+ ... '{"fruits": [{"name": "guava"}, {"name": "grapes"}]}'
134+ ... ])
135+ >>> bbq.json_extract_array(s, "$.fruits")
136+ 0 ['{"name":"apple"}' '{"name":"cherry"}']
137+ 1 ['{"name":"guava"}' '{"name":"grapes"}']
138+ dtype: list<item: string>[pyarrow]
139+
140+ >>> s = bpd.Series([
141+ ... '{"fruits": {"color": "red", "names": ["apple","cherry"]}}',
142+ ... '{"fruits": {"color": "green", "names": ["guava", "grapes"]}}'
143+ ... ])
144+ >>> bbq.json_extract_array(s, "$.fruits.names")
145+ 0 ['"apple"' '"cherry"']
146+ 1 ['"guava"' '"grapes"']
147+ dtype: list<item: string>[pyarrow]
148+
127149 Args:
128- series (bigframes.series.Series):
150+ input (bigframes.series.Series):
129151 The Series containing JSON data (as native JSON objects or JSON-formatted strings).
130152 json_path (str):
131153 The JSON path identifying the data that you want to obtain from the input.
132154
133155 Returns:
134- bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING .
156+ bigframes.series.Series: A new Series with the parsed arrays from the input .
135157 """
136- return series ._apply_unary_op (ops .JSONExtractArray (json_path = json_path ))
158+ return input ._apply_unary_op (ops .JSONExtractArray (json_path = json_path ))
159+
160+
161+ def json_extract_string_array (
162+ input : series .Series ,
163+ json_path : str = "$" ,
164+ value_dtype : Optional [
165+ Union [bigframes .dtypes .Dtype , bigframes .dtypes .DtypeString ]
166+ ] = None ,
167+ ) -> series .Series :
168+ """Extracts a JSON array and converts it to a SQL array of `STRING` values.
169+ A `value_dtype` can be provided to further coerce the data type of the
170+ values in the array. This function uses single quotes and brackets to escape
171+ invalid JSONPath characters in JSON keys.
172+
173+ **Examples:**
174+
175+ >>> import bigframes.pandas as bpd
176+ >>> import bigframes.bigquery as bbq
177+ >>> bpd.options.display.progress_bar = None
178+
179+ >>> s = bpd.Series(['[1, 2, 3]', '[4, 5]'])
180+ >>> bbq.json_extract_string_array(s)
181+ 0 ['1' '2' '3']
182+ 1 ['4' '5']
183+ dtype: list<item: string>[pyarrow]
184+
185+ >>> bbq.json_extract_string_array(s, value_dtype='Int64')
186+ 0 [1 2 3]
187+ 1 [4 5]
188+ dtype: list<item: int64>[pyarrow]
189+
190+ >>> s = bpd.Series([
191+ ... '{"fruits": {"color": "red", "names": ["apple","cherry"]}}',
192+ ... '{"fruits": {"color": "green", "names": ["guava", "grapes"]}}'
193+ ... ])
194+ >>> bbq.json_extract_string_array(s, "$.fruits.names")
195+ 0 ['apple' 'cherry']
196+ 1 ['guava' 'grapes']
197+ dtype: list<item: string>[pyarrow]
198+
199+ Args:
200+ input (bigframes.series.Series):
201+ The Series containing JSON data (as native JSON objects or JSON-formatted strings).
202+ json_path (str):
203+ The JSON path identifying the data that you want to obtain from the input.
204+ value_dtype (dtype, Optional):
205+ The data type supported by BigFrames DataFrame.
206+
207+ Returns:
208+ bigframes.series.Series: A new Series with the parsed arrays from the input.
209+ """
210+ array_series = input ._apply_unary_op (
211+ ops .JSONExtractStringArray (json_path = json_path )
212+ )
213+ if value_dtype not in [None , bigframes .dtypes .STRING_DTYPE ]:
214+ array_items_series = array_series .explode ()
215+ if value_dtype == bigframes .dtypes .BOOL_DTYPE :
216+ array_items_series = array_items_series .str .lower () == "true"
217+ else :
218+ array_items_series = array_items_series .astype (value_dtype )
219+ array_series = cast (
220+ series .Series ,
221+ array .array_agg (
222+ array_items_series .groupby (level = input .index .names , dropna = False )
223+ ),
224+ )
225+ return array_series
0 commit comments