深度学习之LSTM案例分析（三）-白红宇

深度学习之LSTM案例分析（三）

阅读量：181 次

发布时间：2019-02-28

本文共 11538 字，大约阅读时间需要 38 分钟。

#背景

来自GitHub上《》【】

We stack multiple LSTM layers to improve on our Shakespeare language generation. (Character level vocabulary)

堆叠多个LSTM层以改进我们的莎士比亚语言生成。（字符级词汇）

此内容为《深度学习之LSTM案例分析（二）》的补充。【】

#代码

#Stacking LSTM Layers#---------------------#  Here we implement an LSTM model on all a data set of Shakespeare works.#  在这里，我们在莎士比亚作品的所有数据集上实现LSTM模型。#  We will stack multiple LSTM models for a more accurate representation of Shakespearean language.  #  We will also use characters instead of words.#  我们将堆叠多个LSTM模型，以更准确地表示莎士比亚语言。我们还将使用字符而不是单词。import osimport reimport stringimport requestsimport numpy as npimport collectionsimport randomimport pickleimport matplotlib.pyplot as pltimport tensorflow as tffrom tensorflow.python.framework import opsops.reset_default_graph()'''Start a computational graph session.'''# Start a sessionsess = tf.Session()# Set RNN Parametersnum_layers = 3  # Number of RNN layers stackedmin_word_freq = 5  # Trim the less frequent words offrnn_size = 128  # RNN Model size, has to equal embedding sizeepochs = 10  # Number of epochs to cycle through databatch_size = 100  # Train on this many examples at oncelearning_rate = 0.0005  # Learning ratetraining_seq_len = 50  # how long of a word group to considersave_every = 500  # How often to save model checkpointseval_every = 50  # How often to evaluate the test sentencesprime_texts = ['thou art more', 'to be or not to', 'wherefore art thou']# Download/store Shakespeare datadata_dir = 'temp'data_file = 'shakespeare.txt'model_path = 'shakespeare_model'full_model_dir = os.path.join(data_dir, model_path)# Declare punctuation to remove, everything except hyphens and apostrophespunctuation = string.punctuationpunctuation = ''.join([x for x in punctuation if x not in ['-', "'"]])# Make Model Directoryif not os.path.exists(full_model_dir):    os.makedirs(full_model_dir)# Make data directoryif not os.path.exists(data_dir):    os.makedirs(data_dir)'''Load the Shakespeare Data'''print('Loading Shakespeare Data')# Check if file is downloaded.if not os.path.isfile(os.path.join(data_dir, data_file)):    print('Not found, downloading Shakespeare texts from www.gutenberg.org')    shakespeare_url = 'http://www.gutenberg.org/cache/epub/100/pg100.txt'    # Get Shakespeare text    response = requests.get(shakespeare_url)    shakespeare_file = response.content    # Decode binary into string    s_text = shakespeare_file.decode('utf-8')    # Drop first few descriptive paragraphs.    s_text = s_text[7675:]    # Remove newlines    s_text = s_text.replace('\r\n', '')    s_text = s_text.replace('\n', '')        # Write to file    with open(os.path.join(data_dir, data_file), 'w') as out_conn:        out_conn.write(s_text)else:    # If file has been saved, load from that file    with open(os.path.join(data_dir, data_file), 'r') as file_conn:        s_text = file_conn.read().replace('\n', '')'''运行结果：Loading Shakespeare DataNot found, downloading Shakespeare texts from www.gutenberg.orgDone Loading Data.''''''Clean and split the text data.'''# Clean textprint('Cleaning Text')s_text = re.sub(r'[{}]'.format(punctuation), ' ', s_text)s_text = re.sub('\s+', ' ', s_text).strip().lower()# Split up by characterschar_list = list(s_text)'''运行结果：Cleaning Text''''''Build word vocabulary function and transform the text.'''# Build word vocabulary functiondef build_vocab(characters):    character_counts = collections.Counter(characters)    # Create vocab --> index mapping    chars = character_counts.keys()    vocab_to_ix_dict = {key: (inx + 1) for inx, key in enumerate(chars)}    # Add unknown key --> 0 index    vocab_to_ix_dict['unknown'] = 0    # Create index --> vocab mapping    ix_to_vocab_dict = {val: key for key, val in vocab_to_ix_dict.items()}    return ix_to_vocab_dict, vocab_to_ix_dict# Build Shakespeare vocabularyprint('Building Shakespeare Vocab by Characters')ix2vocab, vocab2ix = build_vocab(char_list)vocab_size = len(ix2vocab)print('Vocabulary Length = {}'.format(vocab_size))# Sanity Checkassert(len(ix2vocab) == len(vocab2ix))'''运行结果：Building Shakespeare Vocab by CharactersVocabulary Length = 40'''# Convert text to word vectorss_text_ix = []for x in char_list:    try:        s_text_ix.append(vocab2ix[x])    except KeyError:        s_text_ix.append(0)s_text_ix = np.array(s_text_ix)# Define LSTM RNN Model Classclass LSTM_Model():    def __init__(self, rnn_size, num_layers, batch_size, learning_rate,                 training_seq_len, vocab_size, infer_sample=False):        self.rnn_size = rnn_size        self.num_layers = num_layers        self.vocab_size = vocab_size        self.infer_sample = infer_sample        self.learning_rate = learning_rate                if infer_sample:            self.batch_size = 1            self.training_seq_len = 1        else:            self.batch_size = batch_size            self.training_seq_len = training_seq_len                self.lstm_cell = tf.contrib.rnn.BasicLSTMCell(rnn_size)        self.lstm_cell = tf.contrib.rnn.MultiRNNCell([self.lstm_cell for _ in range(self.num_layers)]) '''新增'''        self.initial_state = self.lstm_cell.zero_state(self.batch_size, tf.float32)                self.x_data = tf.placeholder(tf.int32, [self.batch_size, self.training_seq_len])        self.y_output = tf.placeholder(tf.int32, [self.batch_size, self.training_seq_len])                with tf.variable_scope('lstm_vars'):            # Softmax Output Weights            W = tf.get_variable('W', [self.rnn_size, self.vocab_size], tf.float32, tf.random_normal_initializer())            b = tf.get_variable('b', [self.vocab_size], tf.float32, tf.constant_initializer(0.0))                    # Define Embedding            embedding_mat = tf.get_variable('embedding_mat', [self.vocab_size, self.rnn_size],                                            tf.float32, tf.random_normal_initializer())                                                        embedding_output = tf.nn.embedding_lookup(embedding_mat, self.x_data)            rnn_inputs = tf.split(axis=1, num_or_size_splits=self.training_seq_len, value=embedding_output)            rnn_inputs_trimmed = [tf.squeeze(x, [1]) for x in rnn_inputs]                decoder = tf.contrib.legacy_seq2seq.rnn_decoder        outputs, last_state = decoder(rnn_inputs_trimmed,                                      self.initial_state,                                      self.lstm_cell)                # RNN outputs        output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, rnn_size])        # Logits and output        self.logit_output = tf.matmul(output, W) + b        self.model_output = tf.nn.softmax(self.logit_output)                loss_fun = tf.contrib.legacy_seq2seq.sequence_loss_by_example        loss = loss_fun([self.logit_output],[tf.reshape(self.y_output, [-1])],                [tf.ones([self.batch_size * self.training_seq_len])],                self.vocab_size)        self.cost = tf.reduce_sum(loss) / (self.batch_size * self.training_seq_len)        self.final_state = last_state        gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tf.trainable_variables()), 4.5)        optimizer = tf.train.AdamOptimizer(self.learning_rate)        self.train_op = optimizer.apply_gradients(zip(gradients, tf.trainable_variables()))            def sample(self, sess, words=ix2vocab, vocab=vocab2ix, num=20, prime_text='thou art'):        state = sess.run(self.lstm_cell.zero_state(1, tf.float32))        char_list = list(prime_text)        for char in char_list[:-1]:            x = np.zeros((1, 1))            x[0, 0] = vocab[char]            feed_dict = {self.x_data: x, self.initial_state:state}            [state] = sess.run([self.final_state], feed_dict=feed_dict)        out_sentence = prime_text        char = char_list[-1]        for n in range(num):            x = np.zeros((1, 1))            x[0, 0] = vocab[char]            feed_dict = {self.x_data: x, self.initial_state:state}            [model_output, state] = sess.run([self.model_output, self.final_state], feed_dict=feed_dict)            sample = np.argmax(model_output[0])            if sample == 0:                break            char = words[sample]            out_sentence = out_sentence + char        return out_sentence'''Initialize the LSTM Model'''# Define LSTM Modellstm_model = LSTM_Model(rnn_size,                        num_layers,                        batch_size,                        learning_rate,                        training_seq_len,                        vocab_size)# Tell TensorFlow we are reusing the scope for the testingwith tf.variable_scope(tf.get_variable_scope(), reuse=True):    test_lstm_model = LSTM_Model(rnn_size,                                 num_layers,                                 batch_size,                                 learning_rate,                                 training_seq_len,                                 vocab_size,                                 infer_sample=True)# Create model saversaver = tf.train.Saver(tf.global_variables())# Create batches for each epochnum_batches = int(len(s_text_ix)/(batch_size * training_seq_len)) + 1# Split up text indices into subarrays, of equal sizebatches = np.array_split(s_text_ix, num_batches)# Reshape each split into [batch_size, training_seq_len]batches = [np.resize(x, [batch_size, training_seq_len]) for x in batches]# Initialize all variablesinit = tf.global_variables_initializer()sess.run(init)# Train modeltrain_loss = []iteration_count = 1for epoch in range(epochs):    # Shuffle word indices    random.shuffle(batches)    # Create targets from shuffled batches    targets = [np.roll(x, -1, axis=1) for x in batches]    # Run a through one epoch    print('Starting Epoch #{} of {}.'.format(epoch+1, epochs))    # Reset initial LSTM state every epoch    state = sess.run(lstm_model.initial_state)    for ix, batch in enumerate(batches):        training_dict = {lstm_model.x_data: batch, lstm_model.y_output: targets[ix]}        # We need to update initial state for each RNN cell:        for i, (c, h) in enumerate(lstm_model.initial_state):                    training_dict[c] = state[i].c                    training_dict[h] = state[i].h                temp_loss, state, _ = sess.run([lstm_model.cost, lstm_model.final_state, lstm_model.train_op],                                       feed_dict=training_dict)        train_loss.append(temp_loss)                # Print status every 10 gens        if iteration_count % 10 == 0:            summary_nums = (iteration_count, epoch+1, ix+1, num_batches+1, temp_loss)            print('Iteration: {}, Epoch: {}, Batch: {} out of {}, Loss: {:.2f}'.format(*summary_nums))                # Save the model and the vocab        if iteration_count % save_every == 0:            # Save model            model_file_name = os.path.join(full_model_dir, 'model')            saver.save(sess, model_file_name, global_step=iteration_count)            print('Model Saved To: {}'.format(model_file_name))            # Save vocabulary            dictionary_file = os.path.join(full_model_dir, 'vocab.pkl')            with open(dictionary_file, 'wb') as dict_file_conn:                pickle.dump([vocab2ix, ix2vocab], dict_file_conn)                if iteration_count % eval_every == 0:            for sample in prime_texts:                print(test_lstm_model.sample(sess, ix2vocab, vocab2ix, num=10, prime_text=sample))                        iteration_count += 1# Plot loss over timeplt.plot(train_loss, 'k-')plt.title('Sequence to Sequence Loss')plt.xlabel('Generation')plt.ylabel('Loss')plt.show()

（Placeholder）

转载地址：http://bvqn.baihongyu.com/

你可能感兴趣的文章

MySql中怎样使用case-when实现判断查询结果返回

查看>>

Mysql中怎样使用update更新某列的数据减去指定值

查看>>

Mysql中怎样设置指定ip远程访问连接