ํ์ดํ ์น๋ฅผ ์ด์ฉํ์ฌ ์ํ ์ ๊ฒฝ๋ง(Recurrent Neural Network)๋ฅผ ๊ตฌํํ ๊ฒ์ด๋ค. ๊ตฌํ ๊ณผ์ ์ ๋ค์๊ณผ ๊ฐ๋ค.
1. ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ
2. ๋ชจ๋ธ ์ค์
3. ๋ชจ๋ธ ํ์ต
4. ํ์ต ๊ฒฐ๊ณผ
1. ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ
In:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
string = "To climb steep hills requires a slow pace at first."
chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ?!.,:;'01"
char_list = [i for i in chars]
n_letter = len(chars)
def string_to_onehot(string):
start = np.zeros(shape = len(char_list), dtype = int)
end = np.zeros(shape = len(char_list), dtype = int)
start[-2] = 1
end[-1] = 1
for i in string:
idx = char_list.index(i)
zero = np.zeros(shape = n_letter, dtype = int)
zero[idx] = 1
start = np.vstack([start, zero])
output = np.vstack([start, end])
return(output)
def onehot_to_word(onehot):
onehot_ = torch.Tensor.numpy(onehot)
return char_list[onehot_.argmax()]
โท ํ์ต์ ์ด์ฉ๋ ๋ฌธ์ฅ์ "To climb steep hills requires a slow pace at first."๋ก RNN์ ํตํด ์ด๋ฅผ ํ์ตํ ๋ค, ์์ธกํ ๊ฒ์ด๋ค.
โท ๋ชจ๋ธ์ด ๋ฌธ์ฅ์ ์ธ์ํ ์ ์๋๋ก ์ํซ ์ธ์ฝ๋ฉ์ ํ์๋ค. char_list๋ ๋ฌธ์ฅ์์ ๋์ฌ ์ ์๋ ๋ชจ๋ ๊ธ์์ ๊ฒฝ์ฐ๋ก, ๊ฐ ๊ธ์์ ํด๋น ์์น์ 1 ๋๋ 0์ผ๋ก ๋ฐ๊ฟ ์ ์๋ ๊ธฐ์ค์ ์ ์ํ๋ค. ์๋ฅผ ๋ค์ด, ๊ธ์ B๋ char_list์ ๋ ๋ฒ์งธ ์์น์ ํด๋นํ๋ฏ๋ก ์ํซ ์ธ์ฝ๋ฉ์ ๊ฒฐ๊ณผ๋ [0, 1, ... , 0]๊ฐ ๋๋ค.
โท string_to_onehot ํจ์๋ ๋ฌธ์ฅ์ ์ ๋ ฅ ๋ฐ์์ ์ํซ ์ธ์ฝ๋ฉ๋ ๊ฒฐ๊ณผ๋ฅผ ์ถ๋ ฅํ๋ค.
โท onehot_to_word ํจ์๋ ์ํซ ์ธ์ฝ๋ฉ๋ ๊ฒฐ๊ณผ๋ฅผ ๋ค์ ์๋์ ๋ฌธ์ฅ์ผ๋ก ๋ง๋ค์ด ์ถ๋ ฅํ๋ค.
2. ๋ชจ๋ธ ์ค์
In:
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.ih = nn.Linear(input_size, hidden_size)
self.hh = nn.Linear(hidden_size, hidden_size)
self.io = nn.Linear(hidden_size, output_size)
self.act_fn = nn.Tanh()
def forward(self, input, hidden):
hidden = self.act_fn(self.ih(input) + self.hh(hidden))
output = self.io(hidden)
return output, hidden
def init_hidden(self):
return torch.zeros(1, self.hidden_size)
epochs = 1000
n_hidden = 50
learning_rate = 0.01
rnn = RNN(n_letter, n_hidden, n_letter)
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr = learning_rate)
โท RNN์ ์ธํ, ํ๋ , ์์ํ ๋ ์ด์ด๋ก ๊ตฌ์ฑ๋๋ค. ๊ฐ ๋ ์ด์ด์์์ ๊ฒฐ๊ณผ๊ฐ ์ ๋ฌ๋ ๋, ์ธ ๊ฐ์ง ํ๋ผ๋ฏธํฐ๋ก ์กฐ์ ํ๊ฒ ๋๋ค. ์ธํ ๋ ์ด์ด์์ ํ๋ ๋ ์ด์ด๋ก, ํ๋ ๋ ์ด์ด์์ ํ๋ ๋ ์ด์ด๋ก, ํ๋ ๋ ์ด์ด์์ ์์ํ ๋ ์ด์ด๋ก ์ ๋ฌ๋ ๋์ด๋ค. RNN ํด๋์ค ๋ด๋ถ์ __init__ ํจ์๋ ์ด๋ฅผ ๊ตฌํํด ๋์ ๊ฒฐ๊ณผ์ด๋ค.
โท ํ์ฑํ ํจ์๋ ํ์ดํผ๋ณผ๋ฆญ ํ์ ํธ๋ก ์ค์ ํ์๋ค.
โท init_hidden ํจ์๋ ์ด๊ธฐ ํ๋ ๋ ์ด์ด ์์ฑํ๊ธฐ ์ํ ๊ฒ์ด๋ค.
โท ์ํญ, ํ๋ ๋ ์ด์ด์ ์์ ํ์ต๋ฅ ์ epochs, n_hidden, learning_rate๋ฅผ ํตํด ์ค์ ํ ๋ค, RNN ํจ์๋ฅผ ์ด์ฉํ์ฌ RNN์ ๊ตฌ์กฐ๋ฅผ ์์ฑํ์๋ค.
โท ๋ชจ๋ธ์ ์์ค ํจ์๋ MSE๋ก, ์ต์ ํ์๋ Adam ์๊ณ ๋ฆฌ์ฆ์ผ๋ก ์ค์ ํ์๋ค.
3. ๋ชจ๋ธ ํ์ต
In:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())
for i in range(epochs):
rnn.zero_grad()
total_loss = 0
hidden = rnn.init_hidden()
for j in range(one_hot.size()[0]-1):
input_char = one_hot[j:j+1,:]
target = one_hot[j+1]
output, hidden = rnn.forward(input_char, hidden)
loss = loss_func(output.view(-1), target.view(-1))
total_loss += loss
total_loss.backward()
optimizer.step()
if i % 10 == 0:
print(total_loss)
Out:
tensor(1.6622, grad_fn=<AddBackward0>)
tensor(0.5862, grad_fn=<AddBackward0>)
tensor(0.3629, grad_fn=<AddBackward0>)
tensor(0.2178, grad_fn=<AddBackward0>)
tensor(0.1408, grad_fn=<AddBackward0>)
tensor(0.0931, grad_fn=<AddBackward0>)
tensor(0.0806, grad_fn=<AddBackward0>)
tensor(0.0560, grad_fn=<AddBackward0>)
tensor(0.0423, grad_fn=<AddBackward0>)
tensor(0.0358, grad_fn=<AddBackward0>)
tensor(0.0256, grad_fn=<AddBackward0>)
tensor(0.0201, grad_fn=<AddBackward0>)
tensor(0.0204, grad_fn=<AddBackward0>)
tensor(0.0149, grad_fn=<AddBackward0>)
tensor(0.0115, grad_fn=<AddBackward0>)
tensor(0.0093, grad_fn=<AddBackward0>)
tensor(0.0079, grad_fn=<AddBackward0>)
tensor(0.0092, grad_fn=<AddBackward0>)
tensor(0.0072, grad_fn=<AddBackward0>)
tensor(0.0058, grad_fn=<AddBackward0>)
tensor(0.0082, grad_fn=<AddBackward0>)
tensor(0.0051, grad_fn=<AddBackward0>)
tensor(0.0043, grad_fn=<AddBackward0>)
tensor(0.0035, grad_fn=<AddBackward0>)
tensor(0.0031, grad_fn=<AddBackward0>)
tensor(0.0027, grad_fn=<AddBackward0>)
tensor(0.0033, grad_fn=<AddBackward0>)
tensor(0.0053, grad_fn=<AddBackward0>)
tensor(0.0027, grad_fn=<AddBackward0>)
tensor(0.0022, grad_fn=<AddBackward0>)
tensor(0.0019, grad_fn=<AddBackward0>)
tensor(0.0017, grad_fn=<AddBackward0>)
tensor(0.0015, grad_fn=<AddBackward0>)
tensor(0.0014, grad_fn=<AddBackward0>)
tensor(0.0013, grad_fn=<AddBackward0>)
tensor(0.0016, grad_fn=<AddBackward0>)
tensor(0.0041, grad_fn=<AddBackward0>)
tensor(0.0020, grad_fn=<AddBackward0>)
tensor(0.0014, grad_fn=<AddBackward0>)
tensor(0.0011, grad_fn=<AddBackward0>)
tensor(0.0010, grad_fn=<AddBackward0>)
tensor(0.0009, grad_fn=<AddBackward0>)
tensor(0.0008, grad_fn=<AddBackward0>)
tensor(0.0008, grad_fn=<AddBackward0>)
tensor(0.0007, grad_fn=<AddBackward0>)
tensor(0.0025, grad_fn=<AddBackward0>)
tensor(0.0020, grad_fn=<AddBackward0>)
tensor(0.0012, grad_fn=<AddBackward0>)
tensor(0.0013, grad_fn=<AddBackward0>)
tensor(0.0007, grad_fn=<AddBackward0>)
tensor(0.0006, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0011, grad_fn=<AddBackward0>)
tensor(0.0013, grad_fn=<AddBackward0>)
tensor(0.0006, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0029, grad_fn=<AddBackward0>)
tensor(0.0006, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0022, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0024, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(9.7875e-05, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0025, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(9.9047e-05, grad_fn=<AddBackward0>)
tensor(8.2265e-05, grad_fn=<AddBackward0>)
tensor(7.6619e-05, grad_fn=<AddBackward0>)
โท one_hot์ ํ์ต ๋ฐ์ดํฐ๋ฅผ ์ํซ ์ธ์ฝ๋ฉ์ผ๋ก ๋ฐ๊พผ ํ, ํ์ตํ ์ ์๋๋ก ํ ์์ ํํ๋ก ๋ฐ๊พธ์ด ๋์ ๊ฒ์ด๋ค.
โท for i in range(epochs) ๋ฌธ ์ดํ๋ ๋ชจ๋ธ์ ํ์ตํ๊ธฐ ์ํ ์ฝ๋์ด๊ณ , ๋ชจ๋ธ์ ํ์ต ๊ฒฐ๊ณผ๊ฐ ์ํญ์ด ๋งค 10ํ ์ง๋ ๋๋ง๋ค ์ถ๋ ฅํ๋๋ก ๊ตฌํํ์๋ค.
โท ํ์ต ๋ฌธ์ฅ์ ๊ธธ์ด๊ฐ ์งง์, MSE๊ฐ 0์ ์๋ ดํ๋ ๊ฒ์ ํ์ธํ ์ ์๋ค.
4. ํ์ต ๊ฒฐ๊ณผ
In:
start = torch.zeros(1, len(char_list))
start[:, -2] = 1
with torch.no_grad():
hidden = rnn.init_hidden()
input_char = start
output_string = ""
for i in range(len(string)):
output, hidden = rnn.forward(input_char, hidden)
output_string += onehot_to_word(output.data)
input_char = output
print(output_string)
Out:
To climb steep hills ples ereerereareirii1i pilim
โท ๋ชจ๋ธ์ ๊ฒฐ๊ณผ๋ฅผ ํ์ธํ๊ธฐ ์ํด ๋ฌธ์ฅ์ ์ฒซ ์์์ ๋ํ๋ด๋ [0, ..., 1, 0]์ ์ ๋ ฅ๊ฐ์ผ๋ก ์ฃผ์๋ค.
โท rnn.init_hidden ํจ์๋ฅผ ์ด์ฉํ์ฌ ์ด๊ธฐ ํ๋ ๋ ์ด์ด๋ฅผ ์์ฑํ๊ณ , ์ ๋ ฅ๊ฐ๊ณผ ํ๋ ๋ ์ด์ด๋ฅผ rnn.forward ํจ์์ ์ธ์๋ก ์ฃผ์ด ์ถ๋ ฅ๊ฐ๊ณผ ํ๋ ๋ ์ด์ด๋ฅผ ์ป๋๋ค. ์ด ์ถ๋ ฅ๊ฐ๊ณผ ํ๋ ๋ ์ด์ด๋ ๋ค์ ์ถ๋ ฅ๊ฐ์ ์ป๊ธฐ ์ํ rnn.forward ํจ์์ ์ธ์๋ก ์ฌ์ฉ๋๊ณ , ์ด๋ฌํ ๊ณผ์ ์ ๊ธฐ์กด ํ์ต ๋ฌธ์ฅ์ ๊ธธ์ด๋งํผ ์ค์ํ์ฌ ๋ชจ๋ธ์ ๊ฒฐ๊ณผ๋ฅผ ์ป๋๋ค.
โท ๋จ์ํ ๋ฒ์ ์ธ ๋งํผ ๋ง์กฑ์ค๋ฌ์ด ๊ฒฐ๊ณผ๋ ์๋์ง๋ง, ์๋ ๋ฌธ์ฅ๊ณผ ๋น์ทํ ํํ๋ฅผ ์ป์ ์ ์๋ค.
Reference:
์ต๊ฑดํธ, ใํ์ดํ ์น ์ฒซ๊ฑธ์ใ, ํ๋น๋ฏธ๋์ด(2019)
'Deep Learning > Model' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
LSTM(Long Short-Term Memories model) ๊ตฌํ (0) | 2020.10.10 |
---|---|
ํฉ์ฑ๊ณฑ ์ ๊ฒฝ๋ง(Convolutional Neural Network) ๊ตฌํ (0) | 2020.08.07 |
์ธ๊ณต์ ๊ฒฝ๋ง(Artificial Neural Network) ๊ตฌํ (0) | 2020.07.29 |