Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def test_multiple_timeseries_forecasting_model(random_model_id):
your_model_id = random_model_id

# [START bigquery_dataframes_bqml_arima_multiple_step_2_visualize]

import bigframes.pandas as bpd

df = bpd.read_gbq("bigquery-public-data.new_york.citibike_trips")

features = bpd.DataFrame(
{
"num_trips": df.starttime,
"date": df["starttime"].dt.date,
}
)
date = df["starttime"].dt.date
df.groupby([date])
num_trips = features.groupby(["date"]).count()

# Results from running "print(num_trips)"

# num_trips
# date
# 2013-07-01 16650
# 2013-07-02 22745
# 2013-07-03 21864
# 2013-07-04 22326
# 2013-07-05 21842
# 2013-07-06 20467
# 2013-07-07 20477
# 2013-07-08 21615
# 2013-07-09 26641
# 2013-07-10 25732
# 2013-07-11 24417
# 2013-07-12 19006
# 2013-07-13 26119
# 2013-07-14 29287
# 2013-07-15 28069
# 2013-07-16 29842
# 2013-07-17 30550
# 2013-07-18 28869
# 2013-07-19 26591
# 2013-07-20 25278
# 2013-07-21 30297
# 2013-07-22 25979
# 2013-07-23 32376
# 2013-07-24 35271
# 2013-07-25 31084

num_trips.plot.line(
# Rotate the x labels so they are more visible.
rot=45,
)

# [END bigquery_dataframes_bqml_arima_multiple_step_2_visualize]

# [START bigquery_dataframes_bqml_arima_multiple_step_3_fit]
from bigframes.ml import forecasting
import bigframes.pandas as bpd

df = bpd.read_gbq("bigquery-public-data.new_york.citibike_trips")

features = bpd.DataFrame(
{
"num_trips": df.starttime,
"date": df["starttime"].dt.date,
}
)
num_trips = features.groupby(["date"], as_index=False).count()
model = forecasting.ARIMAPlus()

X = num_trips["date"].to_frame()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to_frame() not needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, will update that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we might need .to_frame() because without it, I see

AttributeError Traceback (most recent call last) Cell In[12], line 18 15 X = num_trips["date"] 16 y = num_trips["num_trips"] ---> 18 model.fit(X, y) 19 # The model.fit() call above created a temporary model. 20 # Use the to_gbq() method to write to a permanent location. 22 model.to_gbq( 23 your_model_id, # For example: "bqml_tutorial.sample_model", 24 replace=True, 25 ) File ~/python-bigquery-dataframes/bigframes/ml/base.py:163, in SupervisedTrainablePredictor.fit(self, X, y) 158 def fit( 159 self: _T, 160 X: Union[bpd.DataFrame, bpd.Series], 161 y: Union[bpd.DataFrame, bpd.Series], 162 ) -> _T: --> 163 return self._fit(X, y) File ~/python-bigquery-dataframes/bigframes/core/log_adapter.py:44, in method_logger.<locals>.wrapper(*args, **kwargs) 42 if api_method_name.startswith("__") or not api_method_name.startswith("_"): 43 add_api_method(full_method_name) ---> 44 return method(*args, **kwargs) File ~/python-bigquery-dataframes/bigframes/ml/forecasting.py:218, in ARIMAPlus._fit(self, X, y, transforms) 197 def _fit( 198 self, 199 X: Union[bpd.DataFrame, bpd.Series], 200 y: Union[bpd.DataFrame, bpd.Series], 201 transforms: Optional[List[str]] = None, 202 ): 203 """Fit the model to training data. 204 205 Args: (...) 216 ARIMAPlus: Fitted estimator. 217 """ --> 218 if X.columns.size != 1: 219 raise ValueError( 220 "Time series timestamp input X must only contain 1 column." 221 ) 222 if y.columns.size != 1: File ~/python-bigquery-dataframes/bigframes/series.py:1062, in Series.__getattr__(self, key) 1053 raise AttributeError( 1054 textwrap.dedent( 1055 f""" (...) 1059 ) 1060 ) 1061 else: -> 1062 raise AttributeError(key) AttributeError: columns 
y = num_trips["num_trips"].to_frame()

model.fit(X, y)
# The model.fit() call above created a temporary model.
# Use the to_gbq() method to write to a permanent location.

model.to_gbq(
your_model_id, # For example: "bqml_tutorial.nyc_citibike_arima_model",
replace=True,
)
# [END bigquery_dataframes_bqml_arima_multiple_step_3_fit]