AI智能
改变未来

《TensorFlow 实战Google深度学习框架 》学习 20190812

写了半天的稿子没有保存!!!

 

 

下面例子为PTB完整的自然语言处理训练程序

# -*- coding: utf-8 -*-import numpy as npimport tensorflow as tf# 训练数据路径TRAIN_DATA = \"ptb.train.\"# 验证数据路径EVAL_DATA = \"ptb.valid\"# 测试数据路径TEST_DATA = \"ptb.test\"# 隐藏层规模HIDDEN_SIZE = 300# 深层循环神经网络中LSTM结构的层数NUM_LAYERS = 2# 词典规模VOCAB_SIZE = 10000# 训练数据batch的大小TRAIN_BATCH_SIZE = 20# 训练数据截断长度TRAIN_NUM_STEP = 35# 测试数据batch的大小EVAL_BATCH_SIZE = 1# 测试数据截断长度EVAL_NUM_STEP = 1# 使用训练数据的轮数NUM_EPOCH = 5# LSTM节点不被dropout的概率LSTM_KEEP_PROB = 0.9# 词向量不被dropout的概率EMBEDDING_KEEP_PROB = 0.9# 用于控制梯度膨胀的梯度大小上限MAX_GRAD_NORM = 5# 在Softmax层和词向量层之间共享参数SHARE_EMB_AND_SOFTMAX = True# 通过一个PTBModel类来描述模型,这样方便维护循环神经网络中的状态。class PTBModel(object):def __init__(self, is_training, batch_size, num_steps):# 记录使用的batch大小和截断长度self.batch_size = batch_size#训练时20self.num_steps = num_steps#训练时35# 定义每一步的输入和预期输出。两者的维度都是[batch_size, num_steps]self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])#[20,35]self.targets = tf.placeholder(tf.int32, [batch_size, num_steps])#[20,35]# 定义使用LSTM结构为循环体结构且使用dropout的深层循环神经网络。dropout_keep_prob = LSTM_KEEP_PROB if is_training else 1.0lstm_cells = [tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE), output_keep_prob=dropout_keep_prob)for _ in range(NUM_LAYERS)]cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)# 初始化最近的状态,即全零的向量。这个量只在每个epoch初始化第一个batch时使用。self.initial_state = cell.zero_state(batch_size, tf.float32)# 定义单词的词向量矩阵embedding = tf.get_variable(\"embedding\", [VOCAB_SIZE, HIDDEN_SIZE])#shape=(10000, 300)# 将输入单词转化为词向量inputs = tf.nn.embedding_lookup(embedding, self.input_data)  #shape=(20, 35, 300)# 只在训练时使用dropoutif is_training:inputs = tf.nn.dropout(inputs, EMBEDDING_KEEP_PROB)# 定义输出列表。在这里先将不同时刻LSTM结构的输出收集起来,再一起提供给softmax层。outputs = []state = self.initial_statewith tf.variable_scope(\"RNN\"):for time_step in range(num_steps):#35if time_step > 0:tf.get_variable_scope().reuse_variables()cell_output, state = cell(inputs[:, time_step, :], state)outputs.append(cell_output)# 把输出队列展开成[batch, hidden_size * num_steps]的形状[20,300*35]#然后再reshape成[batch*num_steps, hidden_size]的形状。[20*35,300]output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE])# Softmax层:将RNN在每个位置上的输出转化为各个单词的logitsif SHARE_EMB_AND_SOFTMAX:weight = tf.transpose(embedding)else:weight = tf.get_variable(\"weight\", [HIDDEN_SIZE, VOCAB_SIZE])bias = tf.get_variable(\"bias\", [VOCAB_SIZE])logits = tf.matmul(output, weight) + bias# 定义交叉熵损失函数和平均损失loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(self.targets, [-1]),logits=logits)self.cost = tf.reduce_sum(loss) / batch_sizeself.final_state = state# 只在训练模型时定义反向传播操作if not is_training:returntrainable_variables = tf.trainable_variables()# 控制梯度大小,定义优化方法和训练步骤grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_variables), MAX_GRAD_NORM)optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)self.train_op = optimizer.apply_gradients(zip(grads, trainable_variables))# 使用给定的模型model在数据data上运行train_op并返回在全部数据上的perplexity值def run_epoch(session, model, batches, train_op, output_log, step):# 计算平均perplexity的辅助变量total_costs = 0.0iters = 0state = session.run(model.initial_state)# 训练一个epochfor x, y in batches:#print(\'x\',tf.shape(x))#shape=(2,)#print(tf.shape(y))#shape=(2,)# 在当前batch上运行train_op并计算损失值。交叉熵损失函数计算的就是下一个单词为给定单词的概率cost, state, _ = session.run([model.cost, model.final_state, train_op],{model.input_data: x, model.targets: y, model.initial_state: state})total_costs += costiters += model.num_steps#35# 只有在训练时输出日志if output_log and step % 100 == 0:print(\"After %d steps, perplexity is %.3f\" % (step, np.exp(total_costs / iters)))step += 1# 返回给定模型在给定数据上的perplexity值return step, np.exp(total_costs / iters)# 从文件中读取数据,并返回包含单词编号的数组def read_data(file_path):with open(file_path, \"r\") as fin:# 将整个文档读进一个长字符串id_string = \' \'.join([line.strip() for line in fin.readlines()])# 将读取的单词编号转为整数id_list = [int(w) for w in id_string.split()]return id_listdef make_batches(id_list, batch_size, num_step):# batch_size: 一个batch中样本的数量# num_batches:batch的个数# num_step: 一个样本的序列长度# 计算总的batch数量。每个batch包含的单词数量是batch_size * num_stepnum_batches = (len(id_list) - 1) // (batch_size * num_step)# 将数据整理成一个维度为[batch_size, num_batches*num_step]的二维数组data = np.array(id_list[: num_batches * batch_size * num_step])data = np.reshape(data, [batch_size, num_batches * num_step])# 沿着第二个维度将数据切分成num_batches个batch,存入一个数组。data_batches = np.split(data, num_batches, axis=1)# 重复上述操作,但是每个位置向右移动一位,这里得到的是RNN每一步输出所需要预测的下一个单词label = np.array(id_list[1: num_batches * batch_size * num_step + 1])label = np.reshape(label, [batch_size, num_batches * num_step])label_batches = np.split(label, num_batches, axis=1)# 返回一个长度为num_batches的数组,其中每一项包括一个data矩阵和一个label矩阵# print(len(id_list))# print(num_batches * batch_size * num_step)return list(zip(data_batches, label_batches))def main():# 定义初始化函数initializer = tf.random_uniform_initializer(-0.05, 0.05)# 定义训练用的循环神经网络模型with tf.variable_scope(\"language_model\", reuse=None, initializer=initializer):train_model = PTBModel(True, TRAIN_BATCH_SIZE, TRAIN_NUM_STEP)# 定义测试用的循环神经网络模型。它与train_model共用参数,但是没有dropoutwith tf.variable_scope(\"language_model\", reuse=True, initializer=initializer):eval_model = PTBModel(False, EVAL_BATCH_SIZE, EVAL_NUM_STEP)# 训练模型with tf.Session() as session:tf.global_variables_initializer().run()train_batches = make_batches(read_data(TRAIN_DATA), TRAIN_BATCH_SIZE, TRAIN_NUM_STEP)eval_batches = make_batches(read_data(EVAL_DATA), EVAL_BATCH_SIZE, EVAL_NUM_STEP)test_batches = make_batches(read_data(TEST_DATA), EVAL_BATCH_SIZE, EVAL_NUM_STEP)step = 0for i in range(NUM_EPOCH):#共5轮print(\"In iteration: %d\" % (i + 1))step, train_pplx = run_epoch(session, train_model, train_batches, train_model.train_op, True, step)print(\"Epoch: %d Train Perplexity: %.3f\" % (i + 1, train_pplx))_, eval_pplx = run_epoch(session, eval_model, eval_batches, tf.no_op(), False, 0)print(\"Epoch: %d Eval Perplexity: %.3f\" % (i + 1, eval_pplx))_, test_pplx = run_epoch(session, eval_model, test_batches, tf.no_op(), False, 0)print(\"Test Perplexity: %.3f\" % test_pplx)if __name__ == \'__main__\':main()

 

转载于:https://www.cnblogs.com/beautifulchenxi/p/11342115.html

赞(0) 打赏
未经允许不得转载:爱站程序员基地 » 《TensorFlow 实战Google深度学习框架 》学习 20190812