imohitmayank
diff --git a/‎README.md‎
Lines changed: 5 additions & 3 deletions b/‎README.md‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎machine-learning/sklearn_hyperparameter_tune_svr.py‎
Lines changed: 44 additions & 0 deletions b/‎machine-learning/sklearn_hyperparameter_tune_svr.py‎
Lines changed: 44 additions & 0 deletions
@@ -7,8 +7,8 @@ List of snippets:
 - `keras_biLSTM.py`: Keras based example for biLSTM model from data preparation to model initialzation to final training.
 - `keras/callback.py`: different callbacks in Keras and how to use them.
 
-- **GIT** (`git/`)
-- `git_config.txt`: Basic GIT operations to add your username and email id in git config.
+- **Machine Learning** (`machine_learning/`)
+- `sklearn_hyperparameter_tune_svr.py`: Hyperparameter tuning for SVR regression algorithm from sklearn
 
 - **Natural Language Processing** (`natural_language_processing/`)
 - `finetune_roberta_huggingface.py`: fine tune RoBERTa model on text.
@@ -24,4 +24,6 @@ List of snippets:
 - `install_package.py`: how to install python packages from python code
 - `python_packaging.sh`: script to clean, build and publish your python package.
 - `virtual_environment.sh`: how to create and activate a virtual python environment (linux and windows)
-
+
+- **GIT** (`git/`)
+- `git_config.txt`: Basic GIT operations to add your username and email id in git config.
@@ -0,0 +1,44 @@
+"""Hyperparameter tuning for SVR regression algorithm
+
+- Specify the search space i.e. the list of algorithm parameters to try
+- for each parameter combination perform a 5 fold CV test
+"""
+
+# import =======
+from sklearn.model_selection import GridSearchCV
+from sklearn.metrics import make_scorer
+from sklearn.metrics import mean_squared_error
+
+# DATA LOAD ============
+train_data = ... # load the features and target on which to train
+
+# SEARCH SPACE ============
+search_space = [{'kernel': ['poly', 'rbf', 'sigmoid'],
+ 'C': [1, 10, 100], 'epsilon': [10, 1, 0.1, 0.2, 0.01]}]
+
+# TUNING ============
+scorer = make_scorer(mean_squared_error, greater_is_better=False)
+svr_gs = GridSearchCV(SVR(), search_space, cv = 5, scoring=scorer, verbose=10, n_jobs=None)
+svr_gs.fit(train_data['features'], train_data['target'])
+
+
+# PRINT RESULT ============
+parameter_result = []
+print("Grid scores on training set:")
+means = svr_gs.cv_results_['mean_test_score']
+stds = svr_gs.cv_results_['std_test_score']
+for mean, std, params in zip(means, stds, svr_gs.cv_results_['params']):
+ print("%0.3f (+/-%0.03f) for %r"% (mean, std * 2, params))
+ parameter_result.append({'mean': abs(mean), 'std': std, **params})
+ 
+# SELECT BEST PARAMETERS ============
+# select the settings with smallest loss
+parameter_result = pd.DataFrame(parameter_result)
+parameter_result = parameter_result.sort_values(by=['mean'])
+best_settings = parameter_result.head(1).to_dict(orient='records')[0]
+
+# FIT WITH BEST PARAMETERS ============
+SVRModel = SVR(C=best_settings['C'], 
+ epsilon=best_settings['epsilon'], 
+ kernel= best_settings['kernel'])
+SVRModel.fit(train_data['features'], train_data['target'])