File tree Expand file tree Collapse file tree 3 files changed +13
-6
lines changed Expand file tree Collapse file tree 3 files changed +13
-6
lines changed Original file line number Diff line number Diff line change 116116 "UTF-32LE" ,
117117}
118118
119- # BigQuery has 1 MB query size limit, 5000 items shouldn 't take more than 10 % of this depending on data type .
120- # TODO(tbergeron): Convert to bytes-based limit
121- MAX_INLINE_DF_SIZE = 5000
119+ # BigQuery has 1 MB query size limit. Don 't want to take up more than a few % of that inlining a table .
120+ # Also must assume that text encoding as literals is much less efficient than in-memory representation.
121+ MAX_INLINE_DF_BYTES = 5000
122122
123123logger = logging .getLogger (__name__ )
124124
@@ -1051,7 +1051,7 @@ def _read_pandas_inline(
10511051 ) -> Optional [dataframe .DataFrame ]:
10521052 import bigframes .dataframe as dataframe
10531053
1054- if pandas_dataframe .size > MAX_INLINE_DF_SIZE :
1054+ if pandas_dataframe .memory_usage ( deep = True ). sum () > MAX_INLINE_DF_BYTES :
10551055 return None
10561056
10571057 try :
Original file line number Diff line number Diff line change @@ -66,6 +66,13 @@ def test_df_construct_pandas_default(scalars_dfs):
6666 pandas .testing .assert_frame_equal (bf_result , pd_result )
6767
6868
69+ def test_df_construct_large_strings ():
70+ data = [["hello" , "w" + "o" * 50000 + "rld" ]]
71+ bf_result = dataframe .DataFrame (data ).to_pandas ()
72+ pd_result = pd .DataFrame (data , dtype = pd .StringDtype (storage = "pyarrow" ))
73+ pandas .testing .assert_frame_equal (bf_result , pd_result , check_index_type = False )
74+
75+
6976def test_df_construct_pandas_load_job (scalars_dfs ):
7077 # This should trigger the inlined codepath
7178 columns = [
Original file line number Diff line number Diff line change 2020
2121import bigframes as bf
2222import bigframes .formatting_helpers as formatting_helpers
23- from bigframes .session import MAX_INLINE_DF_SIZE
23+ from bigframes .session import MAX_INLINE_DF_BYTES
2424
2525job_load_message_regex = r"\w+ job [\w-]+ is \w+\."
2626
@@ -70,7 +70,7 @@ def test_progress_bar_load_jobs(
7070):
7171 # repeat the DF to be big enough to trigger the load job.
7272 df = penguins_pandas_df_default_index
73- while len (df ) < MAX_INLINE_DF_SIZE :
73+ while len (df ) < MAX_INLINE_DF_BYTES :
7474 df = pd .DataFrame (np .repeat (df .values , 2 , axis = 0 ))
7575
7676 bf .options .display .progress_bar = "terminal"
You can’t perform that action at this time.
0 commit comments