Skip to content

Commit 562cf8d

Browse files
committed
address additional comments
1 parent 58ded5d commit 562cf8d

File tree

4 files changed

+26
-36
lines changed

4 files changed

+26
-36
lines changed

bigframes/ml/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def evaluate(self, input_data: Optional[bpd.DataFrame] = None):
189189

190190
def llm_evaluate(
191191
self,
192-
input_data: Optional[bpd.DataFrame] = None,
192+
input_data: bpd.DataFrame,
193193
task_type: Optional[str] = None,
194194
):
195195
sql = self._model_manipulation_sql_generator.ml_llm_evaluate(

bigframes/ml/llm.py

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def predict(
220220
221221
Args:
222222
X (bigframes.dataframe.DataFrame or bigframes.series.Series):
223-
Input DataFrame or Series, which needs to contain a column with name "prompt". Only the column will be used as input.
223+
Input DataFrame or Series, which contains only one column of prompts.
224224
Prompts can include preamble, questions, suggestions, instructions, or examples.
225225
226226
temperature (float, default 0.0):
@@ -335,11 +335,11 @@ def score(
335335
336336
Args:
337337
X (bigframes.dataframe.DataFrame or bigframes.series.Series):
338-
A BigQuery DataFrame as evaluation data. X must have a column named
339-
``input_text`` that contains the prompt text to use when evaluating the model.
338+
A BigQuery DataFrame as evaluation data, which contains only one column of input_text
339+
that contains the prompt text to use when evaluating the model.
340340
y (bigframes.dataframe.DataFrame or bigframes.series.Series):
341-
A BigQuery DataFrame as evaluation labels. y must also have a column named ``output_text`` that contains the generated
342-
text that you would expect to be returned by the model.
341+
A BigQuery DataFrame as evaluation labels, which contains only one column of output_text
342+
that you would expect to be returned by the model.
343343
task_type (Optional[str]):
344344
The type of the task for LLM model. Default to "text_generation".
345345
Possible values: "text_generation", "classification", "summarization", and "question_answering".
@@ -352,25 +352,20 @@ def score(
352352

353353
X, y = utils.convert_to_dataframe(X, y)
354354

355-
X_columns = X.columns.to_list()
356-
y_columns = y.columns.to_list()
357-
if "input_text" not in X_columns:
358-
raise ValueError(
359-
"""Must contain a column named input_text that contains the prompt
360-
text to use when evaluating the model."""
361-
)
362-
if "output_text" not in y_columns:
355+
if len(X.columns) != 1 or len(y.columns) != 1:
363356
raise ValueError(
364-
"""Must contain a column named output_text that contains the generated
365-
text that you would expect to be returned by the model."""
357+
f"Only support one column as input for X and y. {constants.FEEDBACK_LINK}"
366358
)
367359

368-
input_data = (
369-
X.join(y, how="outer") if (X is not None) and (y is not None) else None
370-
)
371-
refined_data = input_data[["input_text", "output_text"]].copy()
360+
# BQML identified the column by name
361+
X_col_label = cast(blocks.Label, X.columns[0])
362+
y_col_label = cast(blocks.Label, y.columns[0])
363+
X = X.rename(columns={X_col_label: "input_text"})
364+
y = y.rename(columns={y_col_label: "output_text"})
365+
366+
input_data = X.join(y, how="outer")
372367

373-
return self._bqml_model.llm_evaluate(refined_data, task_type)
368+
return self._bqml_model.llm_evaluate(input_data, task_type)
374369

375370
def to_gbq(self, model_name: str, replace: bool = False) -> PaLM2TextGenerator:
376371
"""Save the model to BigQuery.

bigframes/ml/sql.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -320,15 +320,11 @@ def ml_evaluate(self, source_df: Optional[bpd.DataFrame] = None) -> str:
320320

321321
# ML evaluation TVFs
322322
def ml_llm_evaluate(
323-
self, source_df: Optional[bpd.DataFrame] = None, task_type: Optional[str] = None
323+
self, source_df: bpd.DataFrame, task_type: Optional[str] = None
324324
) -> str:
325325
"""Encode ML.EVALUATE for BQML"""
326-
if source_df is None:
327-
source_sql = None
328-
else:
329-
# Note: don't need index as evaluate returns a new table
330-
source_sql, _, _ = source_df._to_sql_query(include_index=False)
331-
326+
# Note: don't need index as evaluate returns a new table
327+
source_sql, _, _ = source_df._to_sql_query(include_index=False)
332328
return f"""SELECT * FROM ML.EVALUATE(MODEL `{self._model_name}`,
333329
({source_sql}), STRUCT("{task_type}" AS task_type))"""
334330

tests/system/load/test_llm.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,11 @@ def test_llm_palm_configure_fit(llm_fine_tune_df_default_index, llm_remote_text_
7272

7373
def test_llm_palm_score(llm_fine_tune_df_default_index):
7474
model = bigframes.ml.llm.PaLM2TextGenerator(model_name="text-bison")
75-
eval_df = llm_fine_tune_df_default_index.rename(
76-
columns={"prompt": "input_text", "label": "output_text"}
77-
)
75+
7876
# Check score to ensure the model was fitted
7977
score_result = model.score(
80-
X=eval_df[["input_text"]], y=eval_df[["output_text"]]
78+
X=llm_fine_tune_df_default_index[["prompt"]],
79+
y=llm_fine_tune_df_default_index[["label"]],
8180
).to_pandas()
8281
score_result_col = score_result.columns.to_list()
8382
expected_col = [
@@ -94,12 +93,12 @@ def test_llm_palm_score_params(llm_fine_tune_df_default_index):
9493
model = bigframes.ml.llm.PaLM2TextGenerator(
9594
model_name="text-bison", max_iterations=1
9695
)
97-
eval_df = llm_fine_tune_df_default_index.rename(
98-
columns={"prompt": "input_text", "label": "output_text"}
99-
)
96+
10097
# Check score to ensure the model was fitted
10198
score_result = model.score(
102-
X=eval_df["input_text"], y=eval_df["output_text"], task_type="classification"
99+
X=llm_fine_tune_df_default_index["prompt"],
100+
y=llm_fine_tune_df_default_index["label"],
101+
task_type="classification",
103102
).to_pandas()
104103
score_result_col = score_result.columns.to_list()
105104
expected_col = [

0 commit comments

Comments
 (0)