交叉熵损失函数;sigmoid激励函数
基于numpy的逻辑回归的程序如下:
import numpy as npimport matplotlib.pyplot as pltfrom sklearn.datasets.samples_generator import make_classificationclass logistic_regression():def __init__(self):passdef sigmoid(self, x):z = 1 /(1 + np.exp(-x))return zdef initialize_params(self, dims):W = np.zeros((dims, 1))b = 0return W, bdef logistic(self, X, y, W, b):num_train = X.shape[0]num_feature = X.shape[1]a = self.sigmoid(np.dot(X, W) + b)cost = -1 / num_train * np.sum(y * np.log(a) + (1 - y) * np.log(1 - a))dW = np.dot(X.T, (a - y)) / num_traindb = np.sum(a - y) / num_traincost = np.squeeze(cost)#[]列向量,易于plotreturn a, cost, dW, dbdef logistic_train(self, X, y, learning_rate, epochs):W, b = self.initialize_params(X.shape[1])cost_list = []for i in range(epochs):a, cost, dW, db = self.logistic(X, y, W, b)W = W - learning_rate * dWb = b - learning_rate * dbif i % 100 == 0:cost_list.append(cost)if i % 100 == 0:print(\'epoch %d cost %f\' % (i, cost))params = {\'W\': W,\'b\': b}grads = {\'dW\': dW,\'db\': db}return cost_list, params, gradsdef predict(self, X, params):y_prediction = self.sigmoid(np.dot(X, params[\'W\']) + params[\'b\'])#二分类for i in range(len(y_prediction)):if y_prediction[i] > 0.5:y_prediction[i] = 1else:y_prediction[i] = 0return y_prediction#精确度计算def accuracy(self, y_test, y_pred):correct_count = 0for i in range(len(y_test)):for j in range(len(y_pred)):if y_test[i] == y_pred[j] and i == j:correct_count += 1accuracy_score = correct_count / len(y_test)return accuracy_score#创建数据def create_data(self):X, labels = make_classification(n_samples=100, n_features=2, n_redundant=0, n_informative=2)labels = labels.reshape((-1, 1))offset = int(X.shape[0] * 0.9)#训练集与测试集的划分X_train, y_train = X[:offset], labels[:offset]X_test, y_test = X[offset:], labels[offset:]return X_train, y_train, X_test, y_test#画图函数def plot_logistic(self, X_train, y_train, params):n = X_train.shape[0]xcord1 = []ycord1 = []xcord2 = []ycord2 = []for i in range(n):if y_train[i] == 1:#1类xcord1.append(X_train[i][0])ycord1.append(X_train[i][1])else:#0类xcord2.append(X_train[i][0])ycord2.append(X_train[i][1])fig = plt.figure()ax = fig.add_subplot(111)ax.scatter(xcord1, ycord1, s=32, c=\'red\')ax.scatter(xcord2, ycord2, s=32, c=\'green\')#画点x = np.arange(-1.5, 3, 0.1)y = (-params[\'b\'] - params[\'W\'][0] * x) / params[\'W\'][1]#画二分类直线ax.plot(x, y)plt.xlabel(\'X1\')plt.ylabel(\'X2\')plt.show()if __name__ == \"__main__\":model = logistic_regression()X_train, y_train, X_test, y_test = model.create_data()print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)# (90, 2)(90, 1)(10, 2)(10, 1)#训练模型cost_list, params, grads = model.logistic_train(X_train, y_train, 0.01, 1000)print(params)#计算精确度y_train_pred = model.predict(X_train, params)accuracy_score_train = model.accuracy(y_train, y_train_pred)print(\'train accuracy is:\', accuracy_score_train)y_test_pred = model.predict(X_test, params)accuracy_score_test = model.accuracy(y_test, y_test_pred)print(\'test accuracy is:\', accuracy_score_test)model.plot_logistic(X_train, y_train, params)
结果如下所示: