I'm currently using the following code as a starting point to deepen my understanding of regularized logistic regression. As a first pass I'm just trying to do a binary classification on part of the iris data set.
One problem I think I have encountered is that the negative log-loss (computed with loss and stored in loss_vec) doesn't change much from one iteration to the next.
Another challenge I am facing is trying to figure out how to plot the decision boundary once I have learned the logistic regression coefficients. Using the coefficients to plot the 0.5 decision boundary is way off. This makes me think I have made a mistake somewhere
http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/
import numpy as np import matplotlib.pyplot as plt from sklearn import linear_model, datasets iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. Y = iris.target X = X[:100,:] Y = Y[:100] def phi(t): # logistic function, returns 1 / (1 + exp(-t)) idx = t > 0 out = np.empty(t.size, dtype=np.float) out[idx] = 1. / (1 + np.exp(-t[idx])) exp_t = np.exp(t[~idx]) out[~idx] = exp_t / (1. + exp_t) return out def loss(x0, X, y, alpha): # logistic loss function, returns Sum{-log(phi(t))} #x0 is the weight vector w are the paramaters, c is the bias term w, c = x0[:X.shape[1]], x0[-1] z = X.dot(w) + c yz = y * z idx = yz > 0 out = np.zeros_like(yz) out[idx] = np.log(1 + np.exp(-yz[idx])) out[~idx] = (-yz[~idx] + np.log(1 + np.exp(yz[~idx]))) out = out.sum() / X.shape[0] + .5 * alpha * w.dot(w) return out def gradient(x0, X, y, alpha): # gradient of the logistic loss w, c = x0[:X.shape[1]], x0[-1] z = X.dot(w) + c z = phi(y * z) z0 = (z - 1) * y grad_w = X.T.dot(z0) / X.shape[0] + alpha * w grad_c = z0.sum() / X.shape[0] return np.concatenate((grad_w, [grad_c])) def bgd(X, y, alpha, max_iter): step_sizes = np.array([100,10,1,.1,.01,.001,.0001,.00001]) iter_no = 0 x0 = np.random.random(X.shape[1] + 1) #initialize weight vector #placeholder for coefficients to test against the loss function next_thetas = np.zeros((step_sizes.shape[0],X.shape[1]+1) ) J = loss(x0,X,y,alpha) running_loss = [] while iter_no < max_iter: grad = gradient(x0,X,y,alpha) next_thetas = -(np.outer(step_sizes,grad)-x0) loss_vec = [] for i in range(np.shape(next_thetas)[0]): loss_vec.append(loss(next_thetas[i],X,y,alpha)) ind = np.argmin(loss_vec) x0 = next_thetas[ind] if iter_no % 500 == 0: running_loss.append(loss(x0,X,y,alpha)) iter_no += 1 return next_thetas