googleapis
diff --git a/‎bigframes/session/__init__.py‎
Lines changed: 4 additions & 4 deletions b/‎bigframes/session/__init__.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎tests/system/small/test_dataframe.py‎
Lines changed: 7 additions & 0 deletions b/‎tests/system/small/test_dataframe.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎tests/system/small/test_progress_bar.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/system/small/test_progress_bar.py‎
Lines changed: 2 additions & 2 deletions
@@ -116,9 +116,9 @@
  "UTF-32LE",
 }
 
-# BigQuery has 1 MB query size limit, 5000 items shouldn't take more than 10% of this depending on data type.
-# TODO(tbergeron): Convert to bytes-based limit
-MAX_INLINE_DF_SIZE = 5000
+# BigQuery has 1 MB query size limit. Don't want to take up more than a few % of that inlining a table.
+# Also must assume that text encoding as literals is much less efficient than in-memory representation.
+MAX_INLINE_DF_BYTES = 5000
 
 logger = logging.getLogger(__name__)
 
@@ -1051,7 +1051,7 @@ def _read_pandas_inline(
  ) -> Optional[dataframe.DataFrame]:
  import bigframes.dataframe as dataframe
 
- if pandas_dataframe.size > MAX_INLINE_DF_SIZE:
+ if pandas_dataframe.memory_usage(deep=True).sum() > MAX_INLINE_DF_BYTES:
  return None
 
  try:
 
@@ -66,6 +66,13 @@ def test_df_construct_pandas_default(scalars_dfs):
  pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+def test_df_construct_large_strings():
+ data = [["hello", "w" + "o" * 50000 + "rld"]]
+ bf_result = dataframe.DataFrame(data).to_pandas()
+ pd_result = pd.DataFrame(data, dtype=pd.StringDtype(storage="pyarrow"))
+ pandas.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
 def test_df_construct_pandas_load_job(scalars_dfs):
  # This should trigger the inlined codepath
  columns = [
 
@@ -20,7 +20,7 @@
 
 import bigframes as bf
 import bigframes.formatting_helpers as formatting_helpers
-from bigframes.session import MAX_INLINE_DF_SIZE
+from bigframes.session import MAX_INLINE_DF_BYTES
 
 job_load_message_regex = r"\w+ job [\w-]+ is \w+\."
 
@@ -70,7 +70,7 @@ def test_progress_bar_load_jobs(
 ):
  # repeat the DF to be big enough to trigger the load job.
  df = penguins_pandas_df_default_index
- while len(df) < MAX_INLINE_DF_SIZE:
+ while len(df) < MAX_INLINE_DF_BYTES:
  df = pd.DataFrame(np.repeat(df.values, 2, axis=0))
 
  bf.options.display.progress_bar = "terminal"