基本用法
import numpy as npimport theano.tensor as Tfrom theano import functionx = T.dscalar(\'x\')#建立存量 d表示float64y = T.dscalar(\'y\')z = x + yf = function([x,y],z)#前输入量后输出量print(f(2,3))
#显示functionfrom theano import ppprint(pp(z))
#定义矩阵x = T.dmatrix(\'x\')y = T.dmatrix(\'y\')z = x + yz = T.dot(x,y)#矩阵乘法f = function([x,y],z)print(f(np.arange(12).reshape((3,4)),10*np.ones((3,4))))
function用法
import numpy as npimport theano.tensor as Timport theano#激励函数例子x = dmatrix(\'x\')s = 1/(1+T.exp(-x)) #运用theano运算方式logistic = theano.function([x],s)print(logistic([[0,1],[-2,-3]]))
#function返回多个值a,b = T.dmatrices(\'a\',\'b\')diff = a - babs_diff = abs(diff)diff_squared = diff**2f = theano.function([a,b],[diff,abs_diff,diff_squared])x1,x2,x3=f(np.ones((2,2)),np.arange(4).reshape((2,2)))```a#运用function名字操作x,y,w = T.dscalars(\'x\',\'y\',\'w\')z = (x+y)*wf = theano.function([x,theano.In(y,value=1),#theano定义默认值thaeno.In(w,value=2,name=\'weights\'],#theano定义名字z)print(f(23))print(f(23,2))#改变默认值print(f(23,2,weights=4))#运用名字改变默认值
shared变量
import numpy as npimport theanoimport theano.tensor as Tstate = theano.shared(np.array(0,dtype=np.float64),\'state\')#定义shared变量,定义dtype,前后必须保持一致inc = T.scalar(\'inc\',dtype=state.dtype)accumulator = theano.function([inc],state,update[(state,state+inc)])#得到变量值print(state.get_value())#得到当前state值accumulator(1)print(state.get_value())accumulator(10)print(state.get_value())#设置变量值state.set_value(-1)accumulator(3)print(state.get_value())#暂时使用某值代替变量值,不改变变量值tmp_func = state*2 + inca = T.scalar(dtype = state.dtype)skip_shared = theano.function([inc,a],tmp_func,givens=[(state,a)])#暂时将state值用a的值代替print(ship_shared(2,3))print(state.get_value())
定义Layer类
#定义layerI1 = Layer(inputs,in_size=1,out_size=10,activation_function)I2 = Layer(I1.outputs,10,1,None)
import theanoimport theano.tensor as Timport numpy as npclass Layer(object):def __int__(self,inputs,in_size,out_size,activation_function):self.W = theano.shared(np.random.normal(0,1,(in_size,out_size)))self.b = theano.shared(np.zeros((out_size,))+0.1)self.Wx_plus_b = T.dot(inputs,self.W)+self.bself.activation_function = activation_functionif activation_function is None:self.outputs = self.Wx_plus_belse:self.outputs = self.activation_function(self.Wx-plus_b)
regression 回归例子
from __future__ import print_functionimport theanoimport theano.tensor as Timport numpy as npimport matplotlib.pyplot as pltclass Layer(object):def __init__(self, inputs, in_size, out_size, activation_function=None):self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size)))self.b = theano.shared(np.zeros((out_size, )) + 0.1)self.Wx_plus_b = T.dot(inputs, self.W) + self.bself.activation_function = activation_functionif activation_function is None:self.outputs = self.Wx_plus_belse:self.outputs = self.activation_function(self.Wx_plus_b)# 制造一些数据x_data = np.linspace(-1, 1, 300)[:, np.newaxis]noise = np.random.normal(0, 0.05, x_data.shape)y_data = np.square(x_data) - 0.5 + noise # y = x^2 - 0.5# 展示数据plt.scatter(x_data, y_data)plt.show()# 定义x,t容器x = T.dmatrix(\"x\")y = T.dmatrix(\"y\")# 添加 layersl1 = Layer(x, 1, 10, T.nnet.relu)l2 = Layer(l1.outputs, 10, 1, None)# 计算平均损失值cost = T.mean(T.square(l2.outputs - y))# 计算梯度下降值gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b])# 应用梯度下降值learning_rate = 0.05train = theano.function(inputs=[x, y],outputs=cost,updates=[(l1.W, l1.W - learning_rate * gW1),(l1.b, l1.b - learning_rate * gb1),(l2.W, l2.W - learning_rate * gW2),(l2.b, l2.b - learning_rate * gb2)])# 预测predict = theano.function(inputs=[x], outputs=l2.outputs)for i in range(1000):# trainingerr = train(x_data, y_data)if i % 50 == 0:print(err)
regression 结果可视化
#承接上面预测步骤后fig = plt.figure()ax = fig.add_subplot(1,1,1)ax.scatter(x_data, y_data)plt.ion()#呈现实时更新plt.show()for i in range(1000):# trainingerr = train(x_data, y_data)if i % 50 == 0:# 看到结果以及提高方式try:ax.lines.remove(lines[0])except Exception:passprediction_value = predict(x_data)# 生成图lines = ax.plot(x_data, prediction_value, \'r-\', lw=5)plt.pause(.5)
classification 分类学习
from __future__ import print_functionimport numpy as npimport theanoimport theano.tensor as Tdef compute_accuracy(y_target, y_predict):correct_prediction = np.equal(y_predict, y_target)accuracy = np.sum(correct_prediction)/len(correct_prediction)return accuracyrng = np.randomN = 400 # training 数据个数feats = 784 # input 的 feature数# 生成随机数: D = (input_values, target_class)D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))# 定义x y 容器x = T.dmatrix(\"x\")y = T.dvector(\"y\")# 初始化 weights 和 biasW = theano.shared(rng.randn(feats), name=\"w\")b = theano.shared(0., name=\"b\")# 定义激活函数,交叉熵p_1 = T.nnet.sigmoid(T.dot(x, W) + b) # 激励函数prediction = p_1 > 0.5 # p_1 > 0.5 时,预测值为 True,即为 1xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # 计算针对每个 sample 的交叉熵 xentcost = xent.mean() + 0.01 * (W ** 2).sum()# 计算整批数据的 costgW, gb = T.grad(cost, [W, b]) # 计算 weights 和 bias 的梯度 gW, gb# 激活网络learning_rate = 0.1train = theano.function(inputs=[x, y],outputs=[prediction, xent.mean()],updates=((W, W - learning_rate * gW), (b, b - learning_rate * gb)))predict = theano.function(inputs=[x], outputs=prediction)# 训练for i in range(500):pred, err = train(D[0], D[1])if i % 50 == 0:print(\'cost:\', err)print(\"accuracy:\", compute_accuracy(D[1], predict(D[0])))print(\"target values for D:\")print(D[1])print(\"prediction on D:\")print(predict(D[0]))
过拟合
机器学习为追求更小的误差而导致自负
解决方法
一:增加数据量
二:L1,L2…regularization
Dropout regularization(神经网络)
regularization正规化
from __future__ import print_functionimport theanofrom sklearn.datasets import load_bostonimport theano.tensor as Timport numpy as npimport matplotlib.pyplot as pltclass Layer(object):def __init__(self, inputs, in_size, out_size, activation_function=None):self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size)))self.b = theano.shared(np.zeros((out_size, )) + 0.1)self.Wx_plus_b = T.dot(inputs, self.W) + self.bself.activation_function = activation_functionif activation_function is None:self.outputs = self.Wx_plus_belse:self.outputs = self.activation_function(self.Wx_plus_b)def minmax_normalization(data):xs_max = np.max(data, axis=0)xs_min = np.min(data, axis=0)xs = (1 - 0) * (data - xs_min) / (xs_max - xs_min) + 0return xsnp.random.seed(100)x_data = load_boston().data# 将输入范围浓缩到某个小区间内x_data = minmax_normalization(x_data)y_data = load_boston().target[:, np.newaxis]# 分离数据一部分用于训练,一部分用于测试x_train, y_train = x_data[:400], y_data[:400]x_test, y_test = x_data[400:], y_data[400:]x = T.dmatrix(\"x\")y = T.dmatrix(\"y\")l1 = Layer(x, 13, 50, T.tanh)l2 = Layer(l1.outputs, 50, 1, None)# 不同方法计算损失值cost = T.mean(T.square(l2.outputs - y)) # 没有正规化# cost = T.mean(T.square(l2.outputs - y)) + 0.1 * ((l1.W ** 2).sum() + (l2.W ** 2).sum()) # l2 正规化# cost = T.mean(T.square(l2.outputs - y)) + 0.1 * (abs(l1.W).sum() + abs(l2.W).sum()) # l1 正规化gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b])learning_rate = 0.01train = theano.function(inputs=[x, y],updates=[(l1.W, l1.W - learning_rate * gW1),(l1.b, l1.b - learning_rate * gb1),(l2.W, l2.W - learning_rate * gW2),(l2.b, l2.b - learning_rate * gb2)])compute_cost = theano.function(inputs=[x, y], outputs=cost)# 记录变化趋势train_err_list = []test_err_list = []learning_time = []for i in range(1000):train(x_train, y_train)if i % 10 == 0:# 记录误差值train_err_list.append(compute_cost(x_train, y_train))test_err_list.append(compute_cost(x_test, y_test))learning_time.append(i)# 可视化plt.plot(learning_time, train_err_list, \'r-\')plt.plot(learning_time, test_err_list, \'b--\')plt.show()
Save and reload 神经网络
#保存with open(\'地址\',\'wb\') as file:model = [w.get_value(),b.get_value()]pickle.dump(model,file)#提取with open(\'..\',\'rb\') as file:model = pickle.load(file)w.set_value(model[0])b.set_value(model[1])