@@ -118,7 +118,6 @@ def test_json_set_w_invalid_series_type():
118118def test_json_extract_from_json ():
119119 s = _get_series_from_json ([{"a" : {"b" : [1 , 2 ]}}, {"a" : {"c" : 1 }}, {"a" : {"b" : 0 }}])
120120 actual = bbq .json_extract (s , "$.a.b" ).to_pandas ()
121- # After the introduction of the JSON type, the output should be a JSON-formatted series.
122121 expected = _get_series_from_json ([[1 , 2 ], None , 0 ]).to_pandas ()
123122 pd .testing .assert_series_equal (
124123 actual ,
@@ -129,12 +128,10 @@ def test_json_extract_from_json():
129128def test_json_extract_from_string ():
130129 s = bpd .Series (['{"a": {"b": [1, 2]}}' , '{"a": {"c": 1}}' , '{"a": {"b": 0}}' ])
131130 actual = bbq .json_extract (s , "$.a.b" )
132- expected = _get_series_from_json ([ [1 , 2 ] , None , 0 ])
131+ expected = bpd . Series ([ " [1,2]" , None , "0" ])
133132 pd .testing .assert_series_equal (
134133 actual .to_pandas (),
135134 expected .to_pandas (),
136- check_names = False ,
137- check_dtype = False , # json_extract returns string type. While _get_series_from_json gives a JSON series (pa.large_string).
138135 )
139136
140137
@@ -143,29 +140,68 @@ def test_json_extract_w_invalid_series_type():
143140 bbq .json_extract (bpd .Series ([1 , 2 ]), "$.a" )
144141
145142
143+ def test_json_extract_array_from_json ():
144+ s = _get_series_from_json (
145+ [{"a" : ["ab" , "2" , "3 xy" ]}, {"a" : []}, {"a" : ["4" , "5" ]}, {}]
146+ )
147+ actual = bbq .json_extract_array (s , "$.a" )
148+
149+ # This code provides a workaround for issue https://github.com/apache/arrow/issues/45262,
150+ # which currently prevents constructing a series using the pa.list_(db_types.JSONArrrowType())
151+ sql = """
152+ SELECT 0 AS id, [JSON '"ab"', JSON '"2"', JSON '"3 xy"'] AS data,
153+ UNION ALL
154+ SELECT 1, [],
155+ UNION ALL
156+ SELECT 2, [JSON '"4"', JSON '"5"'],
157+ UNION ALL
158+ SELECT 3, null,
159+ """
160+ df = bpd .read_gbq (sql ).set_index ("id" ).sort_index ()
161+ expected = df ["data" ]
162+
163+ pd .testing .assert_series_equal (
164+ actual .to_pandas (),
165+ expected .to_pandas (),
166+ )
167+
168+
146169def test_json_extract_array_from_json_strings ():
147- s = bpd .Series (['{"a": ["ab", "2", "3 xy"]}' , '{"a": []}' , '{"a": ["4","5"]}' ])
170+ s = bpd .Series (
171+ ['{"a": ["ab", "2", "3 xy"]}' , '{"a": []}' , '{"a": ["4","5"]}' , "{}" ],
172+ dtype = pd .StringDtype (storage = "pyarrow" ),
173+ )
148174 actual = bbq .json_extract_array (s , "$.a" )
149- expected = bpd .Series ([['"ab"' , '"2"' , '"3 xy"' ], [], ['"4"' , '"5"' ]])
175+ expected = bpd .Series (
176+ [['"ab"' , '"2"' , '"3 xy"' ], [], ['"4"' , '"5"' ], None ],
177+ dtype = pd .StringDtype (storage = "pyarrow" ),
178+ )
150179 pd .testing .assert_series_equal (
151180 actual .to_pandas (),
152181 expected .to_pandas (),
153182 )
154183
155184
156- def test_json_extract_array_from_array_strings ():
157- s = bpd .Series (["[1, 2, 3]" , "[]" , "[4,5]" ])
185+ def test_json_extract_array_from_json_array_strings ():
186+ s = bpd .Series (
187+ ["[1, 2, 3]" , "[]" , "[4,5]" ],
188+ dtype = pd .StringDtype (storage = "pyarrow" ),
189+ )
158190 actual = bbq .json_extract_array (s )
159- expected = bpd .Series ([["1" , "2" , "3" ], [], ["4" , "5" ]])
191+ expected = bpd .Series (
192+ [["1" , "2" , "3" ], [], ["4" , "5" ]],
193+ dtype = pd .StringDtype (storage = "pyarrow" ),
194+ )
160195 pd .testing .assert_series_equal (
161196 actual .to_pandas (),
162197 expected .to_pandas (),
163198 )
164199
165200
166201def test_json_extract_array_w_invalid_series_type ():
202+ s = bpd .Series ([1 , 2 ])
167203 with pytest .raises (TypeError ):
168- bbq .json_extract_array (bpd . Series ([ 1 , 2 ]) )
204+ bbq .json_extract_array (s )
169205
170206
171207def test_json_extract_string_array_from_json_strings ():
@@ -203,14 +239,6 @@ def test_json_extract_string_array_w_invalid_series_type():
203239 bbq .json_extract_string_array (bpd .Series ([1 , 2 ]))
204240
205241
206- # b/381148539
207- def test_json_in_struct ():
208- df = bpd .read_gbq (
209- "SELECT STRUCT(JSON '{\\ \" a\\ \" : 1}' AS data, 1 AS number) as struct_col"
210- )
211- assert df ["struct_col" ].struct .field ("data" )[0 ] == '{"a":1}'
212-
213-
214242def test_parse_json_w_invalid_series_type ():
215243 with pytest .raises (TypeError ):
216244 bbq .parse_json (bpd .Series ([1 , 2 ]))
0 commit comments