手把手教你用ModelArts模型实现中文聊天机器人丨【华为云AI贺新年】
一、进入实验
1、根据入口地址进入到中文聊天机器人实验页面
https://developer.huaweicloud.com/develop/aigallery/notebook/detail?id=16e6b382-d189-47d9-8b46-5fccd5181544
2、点击Run in ModelArts
,进入JupyterLab
页面
3、在实验窗口右上角,选择实验环境为TensorFlow-1.13.1
4、点击右侧切换规格
按钮,切换为实验要求的硬件规格,如果默认环境与要求规格一致,则不用切换
二、开始实验
1、下载代码
(1)步骤1代码
import os
import moxing as mox
if not os.path.exists('./Chinese-ChatBot-master'):
mox.file.copy('obs://obs-aigallery-zc/hyx/Chinese-ChatBot/Chinese-ChatBot-master.zip', 'Chinese-ChatBot-master.zip')
os.system('unzip Chinese-ChatBot-master.zip -d ./')
(2)步骤2代码
%cd ./Chinese-ChatBot-master/
(3)步骤1、2输出结果
2、导入环境
(1)步骤2代码
import os
os.system('pip install pydot')
(2)步骤4代码
import os
import numpy as np
import pickle
import operator
import pandas as pd
import jieba
from language.langconv import *
(3)步骤3、4代码输出结果
3、获得数据集
(1)步骤5代码
def Traditional2Simplified(sentence):
sentence = Converter('zh-hans').convert(sentence)
return sentence
def is_all_chinese(strs):
for chart in strs:
if chart < u'\u4e00' or chart > u'\u9fff':
return False
return True
with open('qingyun.tsv', 'r', encoding='utf-8') as f:
lines = f.read().split('\n')
lines = lines[:-2]
question = []
answer = []
for pos, line in enumerate(lines):
if '\t' not in line:
print(line)
line = line.split('\t')
q = line[0].strip()
a = line[1].strip()
question.append(' '.join(jieba.lcut(Traditional2Simplified(q).strip(), cut_all=False)))
answer.append(' '.join(jieba.lcut(Traditional2Simplified(a).strip(), cut_all=False)))
print(len(question))
# print(answer[:10])
(2)步骤5代码输出结果
(3)步骤6代码
def is_all_chinese(strs):
for chart in strs:
if chart < u'\u4e00' or chart > u'\u9fff':
return False
return True
character = set()
for seq in question + answer:
word_list = seq.split(' ')
for word in word_list:
if not is_all_chinese(word):
character.add(word)
def is_pure_english(keyword):
return all(ord(c) < 128 for c in keyword)
character=list(character)
stop_words = set()
for pos, word in enumerate(character):
if not is_pure_english(word):
stop_words.add(word)
print('stop_words: ', len(stop_words))
(4)步骤6代码输出结果
(5)步骤7代码
maxLen=18
for pos, seq in enumerate(question):
seq_list = seq.split(' ')
for epoch in range(3):
for pos_, word in enumerate(seq_list):
if word in stop_words:
seq_list.pop(pos_)
if len(seq_list) > maxLen:
seq_list = seq_list[:maxLen]
question[pos] = ' '.join(seq_list)
for pos, seq in enumerate(answer):
seq_list = seq.split(' ')
for epoch in range(3):
for pos_, word in enumerate(seq_list):
if word in stop_words:
seq_list.pop(pos_)
if len(seq_list) > maxLen:
seq_list = seq_list[:maxLen]
answer[pos] = ' '.join(seq_list)
answer_a = ['BOS ' + i + ' EOS' for i in answer]
answer_b = [i + ' EOS' for i in answer]
(6)步骤8代码
import pickle
counts = {}
BE = ['BOS', 'EOS']
for word_list in question + answer + BE:
for word in word_list.split(' '):
counts[word] = counts.get(word, 0) + 1
word_to_index = {}
for pos, i in enumerate(counts.keys()):
word_to_index[i] = pos
index_to_word = {}
for pos, i in enumerate(counts.keys()):
index_to_word[pos] = i
vocab_bag =list(word_to_index.keys())
with open('word_to_index.pkl', 'wb') as f:
pickle.dump(word_to_index, f, pickle.HIGHEST_PROTOCOL)
with open('index_to_word.pkl', 'wb') as f:
pickle.dump(index_to_word, f, pickle.HIGHEST_PROTOCOL)
with open('vocab_bag.pkl', 'wb') as f:
pickle.dump(vocab_bag, f, pickle.HIGHEST_PROTOCOL)
print('vocab_bag: ', len(vocab_bag))
(7)步骤8代码输出结果
(8)步骤9代码
question = np.array([[word_to_index[w] for w in i.split(' ')] for i in question])
answer_a = np.array([[word_to_index[w] for w in i.split(' ')] for i in answer_a])
answer_b = np.array([[word_to_index[w] for w in i.split(' ')] for i in answer_b])
print('question: ', len(question), '\n', 'answer: ', len(answer))
np.save('question.npy', question[:100000])
np.save('answer_a.npy', answer_a[:100000])
np.save('answer_b.npy', answer_b[:100000])
print('Done!')
(9)步骤9代码输出结果
(10)步骤10代码
question = np.load('question.npy',allow_pickle=True)
answer_a = np.load('answer_a.npy',allow_pickle=True)
answer_b = np.load('answer_b.npy',allow_pickle=True)
print('answer_a.shape: ', answer_a.shape)
with open('word_to_index.pkl', 'rb') as f:
word_to_index = pickle.load(f)
for i, j in word_to_index.items():
word_to_index[i] = j + 1
index_to_word = {}
for key, value in word_to_index.items():
index_to_word[value] = key
pad_question = question
pad_answer_a = answer_a
pad_answer_b = answer_b
maxLen = 20
for pos, i in enumerate(pad_question):
for pos_, j in enumerate(i):
i[pos_] = j + 1
if(len(i) > maxLen):
pad_question[pos] = i[:maxLen]
for pos, i in enumerate(pad_answer_a):
for pos_, j in enumerate(i):
i[pos_] = j + 1
if(len(i) > maxLen):
pad_answer_a[pos] = i[:maxLen]
for pos, i in enumerate(pad_answer_b):
for pos_, j in enumerate(i):
i[pos_] = j + 1
if(len(i) > maxLen):
pad_answer_b[pos] = i[:maxLen]
np.save('answer_o.npy', pad_answer_b)
with open('vocab_bag.pkl', 'rb') as f:
words = pickle.load(f)
vocab_size = len(word_to_index) + 1
print('word_to_vec_map: ', len(list(words)))
print('vocab_size: ', vocab_size)
from keras.preprocessing import sequence
#后端padding
pad_question = sequence.pad_sequences(pad_question, maxlen=maxLen,
dtype='int32', padding='post',
truncating='post')
pad_answer = sequence.pad_sequences(pad_answer_a, maxlen=maxLen,
dtype='int32', padding='post',
truncating='post')
def get_file_list(file_path):
dir_list = os.listdir(file_path)
if not dir_list:
return
else:
dir_list = sorted(dir_list, key=lambda x: os.path.getmtime(os.path.join(file_path, x)))
return dir_list
with open('pad_word_to_index.pkl', 'wb') as f:
pickle.dump(word_to_index, f, pickle.HIGHEST_PROTOCOL)
with open('pad_index_to_word.pkl', 'wb') as f:
pickle.dump(index_to_word, f, pickle.HIGHEST_PROTOCOL)
np.save('pad_question.npy', pad_question)
np.save('pad_answer.npy', pad_answer)
print(pad_answer[:3])
print(pad_answer_b[:3])
(11)步骤10代码输出结果
4、模型训练
(1)步骤11代码
main_path = './'
question = np.load(main_path + 'pad_question.npy')
answer = np.load(main_path + 'pad_answer.npy')
answer_o = np.load(main_path + 'answer_o.npy', allow_pickle=True)
with open(main_path + 'vocab_bag.pkl', 'rb') as f:
words = pickle.load(f)
with open(main_path + 'pad_word_to_index.pkl', 'rb') as f:
word_to_index = pickle.load(f)
with open(main_path + 'pad_index_to_word.pkl', 'rb') as f:
index_to_word = pickle.load(f)
vocab_size = len(word_to_index) + 1
maxLen=20
def get_file_list(file_path):
dir_list = os.listdir(file_path)
if not dir_list:
return
else:
dir_list = sorted(dir_list, key=lambda x: os.path.getmtime(os.path.join(file_path, x)))
return dir_list
(2)步骤12代码
from keras.preprocessing import sequence
def generate_train(batch_size):
print('\n*********************************generate_train()*********************************')
steps=0
question_ = question
answer_ = answer
while True:
batch_answer_o = answer_o[steps:steps+batch_size]
batch_question = question_[steps:steps+batch_size]
batch_answer = answer_[steps:steps+batch_size]
outs = np.zeros([batch_size, maxLen, vocab_size], dtype='float32')
for pos, i in enumerate(batch_answer_o):
for pos_, j in enumerate(i):
if pos_ > 20:
print(i)
outs[pos, pos_, j] = 1 # one-hot
yield [batch_question, batch_answer], outs
steps += batch_size
if steps == 100000:
steps = 0
(3)步骤13代码
from keras.layers import Embedding
from keras.layers import Input, Dense, LSTM, TimeDistributed, Bidirectional, Dropout, Concatenate, RepeatVector, Activation, Dot
from keras.layers import concatenate, dot
from keras.models import Model
from keras.utils import plot_model
from keras.callbacks import ModelCheckpoint, TensorBoard,ReduceLROnPlateau
from keras.initializers import TruncatedNormal
import pydot
import os, re
truncatednormal = TruncatedNormal(mean=0.0, stddev=0.05)
embed_layer = Embedding(input_dim=vocab_size,
output_dim=100,
mask_zero=True,
input_length=None,
embeddings_initializer= truncatednormal)
# embed_layer.build((None,))
LSTM_encoder = LSTM(512,
return_sequences=True,
return_state=True,
# activation='relu',
# dropout=0.25,
# recurrent_dropout=0.1,
kernel_initializer= 'lecun_uniform',
name='encoder_lstm'
)
LSTM_decoder = LSTM(512,
return_sequences=True,
return_state=True,
# activation = 'relu',
# dropout=0.25,
# recurrent_dropout=0.1,
kernel_initializer= 'lecun_uniform',
name='decoder_lstm'
)
#encoder输入 与 decoder输入
input_question = Input(shape=(None, ), dtype='int32', name='input_question')
input_answer = Input(shape=(None, ), dtype='int32', name='input_answer')
input_question_embed = embed_layer(input_question)
input_answer_embed = embed_layer(input_answer)
encoder_lstm, question_h, question_c = LSTM_encoder(input_question_embed)
decoder_lstm, _, _ = LSTM_decoder(input_answer_embed,
initial_state=[question_h, question_c])
attention = dot([decoder_lstm, encoder_lstm], axes=[2, 2])
attention = Activation('softmax')(attention)
context = dot([attention, encoder_lstm], axes=[2,1])
decoder_combined_context = concatenate([context, decoder_lstm])
# output = dense1(decoder_combined_context)
# output = dense2(Dropout(0.5)(output))
# Has another weight + tanh layer as described in equation (5) of the paper
decoder_dense1 = TimeDistributed(Dense(256,activation="tanh"))
decoder_dense2 = TimeDistributed(Dense(vocab_size,activation="softmax"))
output = decoder_dense1(decoder_combined_context) # equation (5) of the paper
output = decoder_dense2(output) # equation (6) of the paper
model = Model([input_question, input_answer], output)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
filepath = main_path + "models/W-" + "-{epoch:3d}-{loss:.4f}-.h5"
checkpoint = ModelCheckpoint(filepath,
monitor='loss',
verbose=1,
save_best_only=True,
mode='min',
period=1,
save_weights_only=True
)
reduce_lr = ReduceLROnPlateau(monitor='loss',
factor=0.2,
patience=2,
verbose=1,
mode='min',
min_delta=0.0001,
cooldown=0,
min_lr=0
)
tensorboard = TensorBoard(log_dir=main_path + 'logs',
# histogram_freq=0,
batch_size=64
# write_graph=True,
# write_grads=True,
# write_images=True,
# embeddings_freq=0,
# embeddings_layer_names=None,
# embeddings_metadata=None,
# embeddings_data=None,
# update_freq='epoch'
)
callbacks_list = [checkpoint, reduce_lr, tensorboard]
initial_epoch_=0
file_list = os.listdir(main_path + 'models/')
if len(file_list) > 0:
epoch_list = get_file_list(main_path + 'models/')
epoch_last = epoch_list[-1]
model.load_weights(main_path + 'models/' + epoch_last)
print("**********checkpoint_loaded: ", epoch_last)
initial_epoch_ = int(epoch_last.split('-')[2]) - 1
print('**********Begin from epoch: ', str(initial_epoch_))
model.fit_generator(generate_train(batch_size=100),
steps_per_epoch=200, # (total samples) / batch_size 100000/100 = 1000
epochs=190, #训练epoch
verbose=1,
callbacks=callbacks_list,
# validation_data=generate_test(batch_size=100),
# validation_steps=200, # 10000/100 = 100
class_weight=None,
max_queue_size=5,
workers=1,
use_multiprocessing=False,
shuffle=False,
initial_epoch=initial_epoch_
)
model.summary()
(4)步骤13代码训练输出结果(此处训练时间较长,稍作等待)
5、测试
(1)步骤14代码
question = np.load('pad_question.npy',allow_pickle=True)
answer = np.load('pad_answer.npy',allow_pickle=True)
answer_o = np.load('answer_o.npy',allow_pickle=True)
with open('vocab_bag.pkl', 'rb') as f:
words = pickle.load(f)
with open('pad_word_to_index.pkl', 'rb') as f:
word_to_index = pickle.load(f)
with open('pad_index_to_word.pkl', 'rb') as f:
index_to_word = pickle.load(f)
vocab_size = len(word_to_index) + 1
maxLen=20
(2)步骤15代码
from keras.preprocessing import sequence
def generate_train(batch_size):
print('\n*********************************generate_train()*********************************')
steps=0
question_ = question
answer_ = answer
while True:
batch_answer_o = answer_o[steps:steps+batch_size]
batch_question = question_[steps:steps+batch_size]
batch_answer = answer_[steps:steps+batch_size]
outs = np.zeros([batch_size, maxLen, vocab_size], dtype='float32')
for pos, i in enumerate(batch_answer_o):
for pos_, j in enumerate(i):
if pos_ > 20:
print(i)
outs[pos, pos_, j] = 1 # one-hot
yield [batch_question, batch_answer], outs
steps += batch_size
if steps == 100000:
steps = 0
(3)步骤16代码
from keras.layers import Embedding
from keras.layers import Input, Dense, LSTM, TimeDistributed, Bidirectional, Dropout, Concatenate, RepeatVector, Activation, Dot
from keras.layers import concatenate, dot
from keras.models import Model
from keras.utils import plot_model
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.initializers import TruncatedNormal
import pydot
import os, re
(4)步骤17代码
truncatednormal = TruncatedNormal(mean=0.0, stddev=0.05)
embed_layer = Embedding(input_dim=vocab_size,
output_dim=100,
mask_zero=True,
input_length=None,
embeddings_initializer= truncatednormal)
LSTM_encoder = LSTM(512,
return_sequences=True,
return_state=True,
kernel_initializer= 'lecun_uniform',
name='encoder_lstm'
)
LSTM_decoder = LSTM(512,
return_sequences=True,
return_state=True,
kernel_initializer= 'lecun_uniform',
name='decoder_lstm'
)
#encoder输入 与 decoder输入
input_question = Input(shape=(None, ), dtype='int32', name='input_question')
input_answer = Input(shape=(None, ), dtype='int32', name='input_answer')
input_question_embed = embed_layer(input_question)
input_answer_embed = embed_layer(input_answer)
encoder_lstm, question_h, question_c = LSTM_encoder(input_question_embed)
decoder_lstm, _, _ = LSTM_decoder(input_answer_embed,
initial_state=[question_h, question_c])
attention = dot([decoder_lstm, encoder_lstm], axes=[2, 2])
attention = Activation('softmax')(attention)
context = dot([attention, encoder_lstm], axes=[2,1])
decoder_combined_context = concatenate([context, decoder_lstm])
# Has another weight + tanh layer as described in equation (5) of the paper
decoder_dense1 = TimeDistributed(Dense(256,activation="tanh"))
decoder_dense2 = TimeDistributed(Dense(vocab_size,activation="softmax"))
output = decoder_dense1(decoder_combined_context) # equation (5) of the paper
output = decoder_dense2(output) # equation (6) of the paper
model = Model([input_question, input_answer], output)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.load_weights('models/W--184-0.5949-.h5') #选择测试的最优模型
model.summary()
(5)步骤17代码输出结果
(6)步骤18代码
question_model = Model(input_question, [encoder_lstm, question_h, question_c])
question_model.summary()
answer_h = Input(shape=(512,))
answer_c = Input(shape=(512,))
encoder_lstm = Input(shape=(maxLen,512))
target, h, c = LSTM_decoder(input_answer_embed, initial_state=[answer_h, answer_c])
attention = dot([target, encoder_lstm], axes=[2, 2])
attention_ = Activation('softmax')(attention)
context = dot([attention_, encoder_lstm], axes=[2,1])
decoder_combined_context = concatenate([context, target])
output = decoder_dense1(decoder_combined_context) # equation (5) of the paper
output = decoder_dense2(output) # equation (6) of the paper
answer_model = Model([input_answer, answer_h, answer_c, encoder_lstm], [output, h, c, attention_])
answer_model.summary()
(7)步骤18代码输出结果
(8)步骤19代码
from keras.preprocessing import sequence
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import jieba
import requests
def act_weather(city):
#TODO: Get weather by api
url = 'http://wthrcdn.etouch.cn/weather_mini?city=' + city
page = requests.get(url)
data = page.json()
temperature = data['data']['wendu']
notice = data['data']['ganmao']
outstrs = "地点: %s\n气温: %s\n注意: %s" % (city, temperature, notice)
return outstrs + ' EOS'
def input_question(seq):
seq = jieba.lcut(seq.strip(), cut_all=False)
sentence = seq
try:
seq = np.array([word_to_index[w] for w in seq])
except KeyError:
seq = np.array([36874, 165, 14625])
seq = sequence.pad_sequences([seq], maxlen=maxLen,
padding='post', truncating='post')
#print(seq)
return seq, sentence
def decode_greedy(seq, sentence):
question = seq
for index in question[0]:
if int(index) == 5900:
for index_ in question[0]:
if index_ in [7851, 11842,2406, 3485, 823, 12773, 8078]:
return act_weather(index_to_word[index_])
answer = np.zeros((1, 1))
attention_plot = np.zeros((20, 20))
answer[0, 0] = word_to_index['BOS']
i=1
answer_ = []
flag = 0
encoder_lstm_, question_h, question_c = question_model.predict(x=question, verbose=1)
# print(question_h, '\n')
while flag != 1:
prediction, prediction_h, prediction_c, attention = answer_model.predict([
answer, question_h, question_c, encoder_lstm_
])
attention_weights = attention.reshape(-1, )
attention_plot[i] = attention_weights
word_arg = np.argmax(prediction[0, -1, :])#
answer_.append(index_to_word[word_arg])
if word_arg == word_to_index['EOS'] or i > 20:
flag = 1
answer = np.zeros((1, 1))
answer[0, 0] = word_arg
question_h = prediction_h
question_c = prediction_c
i += 1
result = ' '.join(answer_)
attention_plot = attention_plot[:len(result.split(' ')), :len(sentence)]
#plot_attention(attention_plot, sentence, result.split(' '))
return ' '.join(answer_)
def decode_beamsearch(seq, beam_size):
question = seq
encoder_lstm_, question_h, question_c = question_model.predict(x=question, verbose=1)
sequences = [[[word_to_index['BOS']], 1.0, question_h, question_c]]
answer = np.zeros((1, 1))
answer[0, 0] = word_to_index['BOS']
answer_ = ''
flag = 0
last_words = [word_to_index['BOS']]
for i in range(maxLen):
all_candidates = []
for j in range(len(sequences)):
s, score, h, c = sequences[j]
last_word = s[-1]
if not isinstance(last_word, int):
last_word=last_word[-1]
answer[0, 0] = last_word
output, h, c, _ = answer_model.predict([answer, h, c, encoder_lstm_])
output = output[0, -1]
for k in range(len(output)):
candidate = [seq+[k], score*-np.log(output[k]), h, c]
all_candidates.append(candidate)
ordered = sorted(all_candidates, key=lambda tup:tup[1])
sequences = ordered[:beam_size]
answer_ = sequences[0][0]
print(answer_[0])
answer_ = [index_to_word[x] for x in answer_[0] if (x!=0)]
answer_ = ' '.join(answer_)
return answer_
def plot_attention(attention, sentence, predicted_sentence):
zhfont = matplotlib.font_manager.FontProperties(fname='simkai.ttf')
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1, 1, 1)
attention = [x[::-1] for x in attention]
ax.matshow(attention, cmap='viridis')
fontdict = {'fontsize': 20}
ax.set_xticklabels([''] + sentence, fontdict=fontdict,fontproperties=zhfont)
ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict, fontproperties=zhfont)
# ax.yaxis.set_ticks_position('right') #y轴刻度位置靠右
plt.show()
(9)步骤19代码输出结果
(10)步骤20代码
while True:
seq = input('A:')
if seq == 'x':
break
seq, sentence = input_question(seq)
#print(sentence)
answer = decode_greedy(seq, sentence)
answer = ''.join(answer.split(' ')[:-1])
print('B: ', answer)
(11)步骤20代码输出结果
在这里可以一直输入问题与它进行对话,至此该聊天机器人实验完成。
三、实验体会
经过本次实验,可以感受到华为云Al能力真的很强大,我对AI模型了解不多,但依旧可以按照步骤一步一步的完成整个案例,并且不需要特别强的AI知识就可以完成一个中文聊天的机器人,真的很有意思,也非常吸引人,确实是一个不错的零门槛AI开发平台。
【华为云AI贺新年】有奖征文火热进行中:https://bbs.huaweicloud.cn/blogs/325842
- 点赞
- 收藏
- 关注作者
评论(0)