1+ """Hyperparameter tuning for SVR regression algorithm
2+
3+ - Specify the search space i.e. the list of algorithm parameters to try
4+ - for each parameter combination perform a 5 fold CV test
5+ """
6+
7+ # import =======
8+ from sklearn .model_selection import GridSearchCV
9+ from sklearn .metrics import make_scorer
10+ from sklearn .metrics import mean_squared_error
11+
12+ # DATA LOAD ============
13+ train_data = ... # load the features and target on which to train
14+
15+ # SEARCH SPACE ============
16+ search_space = [{'kernel' : ['poly' , 'rbf' , 'sigmoid' ],
17+ 'C' : [1 , 10 , 100 ], 'epsilon' : [10 , 1 , 0.1 , 0.2 , 0.01 ]}]
18+
19+ # TUNING ============
20+ scorer = make_scorer (mean_squared_error , greater_is_better = False )
21+ svr_gs = GridSearchCV (SVR (), search_space , cv = 5 , scoring = scorer , verbose = 10 , n_jobs = None )
22+ svr_gs .fit (train_data ['features' ], train_data ['target' ])
23+
24+
25+ # PRINT RESULT ============
26+ parameter_result = []
27+ print ("Grid scores on training set:" )
28+ means = svr_gs .cv_results_ ['mean_test_score' ]
29+ stds = svr_gs .cv_results_ ['std_test_score' ]
30+ for mean , std , params in zip (means , stds , svr_gs .cv_results_ ['params' ]):
31+ print ("%0.3f (+/-%0.03f) for %r" % (mean , std * 2 , params ))
32+ parameter_result .append ({'mean' : abs (mean ), 'std' : std , ** params })
33+
34+ # SELECT BEST PARAMETERS ============
35+ # select the settings with smallest loss
36+ parameter_result = pd .DataFrame (parameter_result )
37+ parameter_result = parameter_result .sort_values (by = ['mean' ])
38+ best_settings = parameter_result .head (1 ).to_dict (orient = 'records' )[0 ]
39+
40+ # FIT WITH BEST PARAMETERS ============
41+ SVRModel = SVR (C = best_settings ['C' ],
42+ epsilon = best_settings ['epsilon' ],
43+ kernel = best_settings ['kernel' ])
44+ SVRModel .fit (train_data ['features' ], train_data ['target' ])
0 commit comments