googleapis
diff --git a/‎bigframes/ml/core.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/ml/core.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/ml/llm.py‎
Lines changed: 15 additions & 20 deletions b/‎bigframes/ml/llm.py‎
Lines changed: 15 additions & 20 deletions
diff --git a/‎bigframes/ml/sql.py‎
Lines changed: 3 additions & 7 deletions b/‎bigframes/ml/sql.py‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎tests/system/load/test_llm.py‎
Lines changed: 7 additions & 8 deletions b/‎tests/system/load/test_llm.py‎
Lines changed: 7 additions & 8 deletions
@@ -189,7 +189,7 @@ def evaluate(self, input_data: Optional[bpd.DataFrame] = None):
 
  def llm_evaluate(
  self,
- input_data: Optional[bpd.DataFrame] = None,
+ input_data: bpd.DataFrame,
  task_type: Optional[str] = None,
  ):
  sql = self._model_manipulation_sql_generator.ml_llm_evaluate(
 
@@ -220,7 +220,7 @@ def predict(
 
  Args:
  X (bigframes.dataframe.DataFrame or bigframes.series.Series):
- Input DataFrame or Series, which needs to contain a column with name "prompt". Only the column will be used as input.
+ Input DataFrame or Series, which contains only one column of prompts.
  Prompts can include preamble, questions, suggestions, instructions, or examples.
 
  temperature (float, default 0.0):
@@ -335,11 +335,11 @@ def score(
 
  Args:
  X (bigframes.dataframe.DataFrame or bigframes.series.Series):
- A BigQuery DataFrame as evaluation data. X must have a column named
- ``input_text`` that contains the prompt text to use when evaluating the model.
+ A BigQuery DataFrame as evaluation data, which contains only one column of input_text
+ that contains the prompt text to use when evaluating the model.
  y (bigframes.dataframe.DataFrame or bigframes.series.Series):
- A BigQuery DataFrame as evaluation labels. y must also have a column named ``output_text`` that contains the generated
- text that you would expect to be returned by the model.
+ A BigQuery DataFrame as evaluation labels, which contains only one column of output_text
+ that you would expect to be returned by the model.
  task_type (Optional[str]):
  The type of the task for LLM model. Default to "text_generation".
  Possible values: "text_generation", "classification", "summarization", and "question_answering".
@@ -352,25 +352,20 @@ def score(
 
  X, y = utils.convert_to_dataframe(X, y)
 
- X_columns = X.columns.to_list()
- y_columns = y.columns.to_list()
- if "input_text" not in X_columns:
- raise ValueError(
- """Must contain a column named input_text that contains the prompt
- text to use when evaluating the model."""
- )
- if "output_text" not in y_columns:
+ if len(X.columns) != 1 or len(y.columns) != 1:
  raise ValueError(
- """Must contain a column named output_text that contains the generated
- text that you would expect to be returned by the model."""
+ f"Only support one column as input for X and y. {constants.FEEDBACK_LINK}"
  )
 
- input_data = (
- X.join(y, how="outer") if (X is not None) and (y is not None) else None
- )
- refined_data = input_data[["input_text", "output_text"]].copy()
+ # BQML identified the column by name
+ X_col_label = cast(blocks.Label, X.columns[0])
+ y_col_label = cast(blocks.Label, y.columns[0])
+ X = X.rename(columns={X_col_label: "input_text"})
+ y = y.rename(columns={y_col_label: "output_text"})
+
+ input_data = X.join(y, how="outer")
 
- return self._bqml_model.llm_evaluate(refined_data, task_type)
+ return self._bqml_model.llm_evaluate(input_data, task_type)
 
  def to_gbq(self, model_name: str, replace: bool = False) -> PaLM2TextGenerator:
  """Save the model to BigQuery.
 
@@ -320,15 +320,11 @@ def ml_evaluate(self, source_df: Optional[bpd.DataFrame] = None) -> str:
 
  # ML evaluation TVFs
  def ml_llm_evaluate(
- self, source_df: Optional[bpd.DataFrame] = None, task_type: Optional[str] = None
+ self, source_df: bpd.DataFrame, task_type: Optional[str] = None
  ) -> str:
  """Encode ML.EVALUATE for BQML"""
- if source_df is None:
- source_sql = None
- else:
- # Note: don't need index as evaluate returns a new table
- source_sql, _, _ = source_df._to_sql_query(include_index=False)
-
+ # Note: don't need index as evaluate returns a new table
+ source_sql, _, _ = source_df._to_sql_query(include_index=False)
  return f"""SELECT * FROM ML.EVALUATE(MODEL `{self._model_name}`,
  ({source_sql}), STRUCT("{task_type}" AS task_type))"""
 
 
@@ -72,12 +72,11 @@ def test_llm_palm_configure_fit(llm_fine_tune_df_default_index, llm_remote_text_
 
 def test_llm_palm_score(llm_fine_tune_df_default_index):
  model = bigframes.ml.llm.PaLM2TextGenerator(model_name="text-bison")
- eval_df = llm_fine_tune_df_default_index.rename(
- columns={"prompt": "input_text", "label": "output_text"}
- )
+
  # Check score to ensure the model was fitted
  score_result = model.score(
- X=eval_df[["input_text"]], y=eval_df[["output_text"]]
+ X=llm_fine_tune_df_default_index[["prompt"]],
+ y=llm_fine_tune_df_default_index[["label"]],
  ).to_pandas()
  score_result_col = score_result.columns.to_list()
  expected_col = [
@@ -94,12 +93,12 @@ def test_llm_palm_score_params(llm_fine_tune_df_default_index):
  model = bigframes.ml.llm.PaLM2TextGenerator(
  model_name="text-bison", max_iterations=1
  )
- eval_df = llm_fine_tune_df_default_index.rename(
- columns={"prompt": "input_text", "label": "output_text"}
- )
+
  # Check score to ensure the model was fitted
  score_result = model.score(
- X=eval_df["input_text"], y=eval_df["output_text"], task_type="classification"
+ X=llm_fine_tune_df_default_index["prompt"],
+ y=llm_fine_tune_df_default_index["label"],
+ task_type="classification",
  ).to_pandas()
  score_result_col = score_result.columns.to_list()
  expected_col = [