塞进裤子ヾ(≧O≦)〃嗷~

0%

RNN LSTM前向传播代码

RNN

mark

分两类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77

import numpy as np

def softmax(x):

e_x = np.exp(x - np.max(x)) #计算softmax前先归一化
return e_x / e_x.sum(axis=0)

def rnn_cell_forward(xt, a_prev, parameters):
"""
:param xt: t时刻输入
:param a_prev: t-1时刻隐藏层状态
:return:
at -- next hidden state, of shape (n_a, m)
yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m)
cache -- tuple of values needed for the backward pass, contains (at, a_prev, xt, parameters)
"""
Wax = parameters["Wax"]
Waa = parameters["Waa"]
Wya = parameters["Wya"]
ba = parameters["ba"]
by = parameters["by"]

at = np.tanh(np.dot(Wax, xt) + np.dot(Waa, a_prev) + ba)
yt_pred =softmax(np.dot(Wya, at) + by)

cache = (at, a_prev, xt, parameters) #TODO: hang

return at, yt_pred, cache

def rnn_forward(x, a0, parameters):
"""
:param x: 每个时间步的输入【n_x, m, T_x】 m样本个数,T_x时间步长度
:param a0: 隐藏层初始状态
:param parameters:
:return: a 所有时间步的隐藏层状态
y_preds 所有时间步的预测
"""
caches = []

n_x, m, T_x = x.shape
n_y, n_a = parameters["Wya"].shape #n_a是一层RNN中神经元你的个数

a = np.zeros([n_a, m, T_x])
y_preds = np.zeros([n_y, m, T_x])


a_next = a0

for t in range(T_x):
at, y_pred, cache = rnn_cell_forward(x[:,:,t], a_next, parameters)

#save 每个时间步的hidden states 和 y_pred
a[:, :, t] = at
y_preds[:, :, t] = y_pred

caches.append(cache)

caches = (caches, x)

return a, y_preds, caches


np.random.seed(1)
x = np.random.randn(3,10,4) #【n_x, m, T_x】 n_x此样本的特征数 , m样本个数,T_x时间步长度
a0 = np.random.randn(5,10)
Waa = np.random.randn(5,5)
Wax = np.random.randn(5,3)
Wya = np.random.randn(2,5)
ba = np.random.randn(5,1)
by = np.random.randn(2,1)
parameters = {"Waa": Waa, "Wax": Wax, "Wya": Wya, "ba": ba, "by": by}

a, y_pred, caches = rnn_forward(x, a0, parameters)
print("a[4][1] = ", a[4][1])
print("a.shape = ", a.shape)
print("len(caches) = ", len(caches))

LSTM

mark

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
Wf = parameters["Wf"]
bf = parameters["bf"]
Wi = parameters["Wi"]
bi = parameters["bi"]
Wc = parameters["Wc"]
bc = parameters["bc"]
Wo = parameters["Wo"]
bo = parameters["bo"]
Wy = parameters["Wy"]
by = parameters["by"]

n_x, m = xt.shape
n_y, n_a = Wy.shape

# concat at-1 and xt
concat = np.zeros([n_x+n_a, m])
concat[:n_a,:] = a_prev
concat[n_a:,:] = xt

ft = sigmoid(np.dot(Wf,concat) + bf)
it = sigmoid(np.dot(Wi,concat) + bf)
cct = np.tanh(np.dot(Wc, concat) + bc) #候选cell state
ct = ft*c_prev + it*cct #类型为ndarry时,*表示hadamard product
ot = sigmoid(np.dot(Wo, concat) + bo)
at = ot * np.tanh(ct)

y_pred = softmax(np.dot(Wy, at) + by)

cache = (at, ct, a_prev, c_prev, ft, it, cct, ot, xt, parameters)

return at, ct, y_pred, cache

def lstm_forward(x, a0, parameters):
"""
:param x: 每个时间步的输入【n_x, m, T_x】 m样本个数,T_x时间步长度
:param a0: 隐藏层初始状态
:param parameters:
:return: a 所有时间步的隐藏层状态
y_preds 所有时间步的预测
c 所有时间步的cell steate
caches
"""
caches = []

n_x, m, T_x = x.shape
n_y, n_a = parameters["Wy"].shape
a = np.zeros([n_a, m, T_x])
c = np.zeros([n_a, m, T_x])
y = np.zeros([n_y, m, T_x])

for t in range(T_x):
at, ct, yt, cache = lstm_cell_forward(x[:,:,t], a0, c[:,:,t], parameters)
a[:, :, t] = at
y[:, :, t] = yt
c[:, :, t] = ct

caches.append(cache)

caches = (cache, x)

return a,y,c,caches


np.random.seed(1)
x = np.random.randn(3,10,7)
a0 = np.random.randn(5,10)
Wf = np.random.randn(5, 5+3)
bf = np.random.randn(5,1)
Wi = np.random.randn(5, 5+3)
bi = np.random.randn(5,1)
Wo = np.random.randn(5, 5+3)
bo = np.random.randn(5,1)
Wc = np.random.randn(5, 5+3)
bc = np.random.randn(5,1)
Wy = np.random.randn(2,5)
by = np.random.randn(2,1)

parameters = {"Wf": Wf, "Wi": Wi, "Wo": Wo, "Wc": Wc, "Wy": Wy, "bf": bf, "bi": bi, "bo": bo, "bc": bc, "by": by}

a, y, c, caches = lstm_forward(x, a0, parameters)
print("a[4][3][6] = ", a[4][3][6])

恐龙岛

mark

字符级别语言模型,生成恐龙的名字,每个y_pred表示一个字母。
根据y_pred(softmax概率)来sampling。

每一行(一个名字)作为一个训练样本,输入是一个字母,onehot向量(size=27,一个\n);

比如如果名字是TOM,每个时间步输入一个字母,输入是0 T O M, label是 T O M \n。

将输入的每个字母用one-hot表示,每个时间步只输入一个字母,计算y_pred。

交叉熵损失函数计算y_pred 和真实label,反向传播。

if help:小手一抖点个广告 or 大手一挥资助一下