@@ -17,6 +17,21 @@ class OneHotEncoder(BaseEstimator):
1717 Note that this method deviates from Scikit-Learn; instead of producing sparse
1818 binary columns, the encoding is a single column of `STRUCT<index INT64, value DOUBLE>`.
1919
20+ **Examples:**
21+
22+ Given a dataset with two features, we let the encoder find the unique
23+ values per feature and transform the data to a binary one-hot encoding.
24+
25+ .. code-block::
26+
27+ from bigframes.ml.preprocessing import OneHotEncoder
28+ import bigframes.pandas as bpd
29+
30+ enc = OneHotEncoder()
31+ X = bpd.DataFrame({"a": ["Male", "Female", "Female"], "b": ["1", "3", "2"]})
32+ enc.fit(X)
33+ print(enc.transform(bpd.DataFrame({"a": ["Female", "Male"], "b": ["1", "4"]})))
34+
2035 Args:
2136 drop (Optional[Literal["most_frequent"]], default None):
2237 Specifies a methodology to use to drop one of the categories per feature.
@@ -37,21 +52,6 @@ class OneHotEncoder(BaseEstimator):
3752 when considering infrequent categories. If there are infrequent categories,
3853 max_categories includes the category representing the infrequent categories along with the frequent categories.
3954 Default None, set limit to 1,000,000.
40-
41- Examples:
42-
43- Given a dataset with two features, we let the encoder find the unique
44- values per feature and transform the data to a binary one-hot encoding.
45-
46- .. code-block::
47-
48- from bigframes.ml.preprocessing import OneHotEncoder
49- import bigframes.pandas as bpd
50-
51- enc = OneHotEncoder()
52- X = bpd.DataFrame({"a": ["Male", "Female", "Female"], "b": ["1", "3", "2"]})
53- enc.fit(X)
54- print(enc.transform(bpd.DataFrame({"a": ["Female", "Male"], "b": ["1", "4"]})))
5555 """
5656
5757 def fit (self , X , y = None ):
0 commit comments