그전까지의 코딩까지 묶어서 일단 한번 봅시다.
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import os
os.environ['KAGGLE_USERNAME'] = # username
os.environ['KAGGLE_KEY'] = # key
!kaggle competitions download -c digit-recognizer
!unzip -q test.csv.zip
!unzip -q train.csv.zip
train=pd.read_csv('train.csv')
test= pd.read_csv('test.csv')
Label = train['label'].values
Train = train.drop(columns=['label']).values/255
Test = test.values/255
[N,size_input]=Train.shape
def ReLU(x):
return np.maximum(0,x)
def ReLU_derivative(x):
return x>0
def cross_entropy(pred,Label):
true = np.eye(num_class)[Label]
loss = (np.log(pred) * true).sum(axis=1)
return -np.sum(loss)
num_class=10
num_iter = 5
num_Hidden = 256
np.random.seed(1127) #for the experimental stability
#W[0]= Matrix_input_to_Hidden_layer, W[1]= MAtrix_Hidden_to_output_layer
W= [1e-1*np.random.randn(size_input, num_Hidden), 1e-1*np.random.randn(num_Hidden, num_class)]
여기부터 feed 함수에 변경점이 있습니다.
미분하다 보니 간단하게 표현 가능하게 해주는 변수들이 있었잖아요? 그것들을 따로 빼주겠습니다.
def feed(Input,Matrix,bias):
Hidden = np.dot(Input, Matrix[0]) + bias[0]
z_1=Hidden
Activation = ReLU(z_1)
z_2=np.dot(Activation,Matrix[1])
#Softmax Part
s=np.exp(z_2)
total = np.sum(s, axis=1).reshape(-1,1)
Output = s/total
return Output, Activation, z_1
여기가 이제 새롭게 추가되는 부분입니다~!
def backprop(Matrix,bias,Input,Label):
#step 1
Output,Activate_1,z_1 = feed(Input,Matrix,bias)
#step 2 Output from Hidden derivative
true = np.eye(num_class)[Label]
# \part cost / \part z_2 is delta _ H2O
# \part z_2 / \part Matrix[1] is grad_H2O
delta_H2O = (Output -true)
grad_H2O = np.dot(Activate_1.T, delta_H2O)
#step 3 Hidden from Input derivative
delta_I2H = np.dot(delta_H2O, Matrix[1].T)*d_ReLU(z_1)
grad_I2H = np.dot(Input.T, delta_I2H)
dMatrix = [grad_I2H/N, grad_H2O/N]
dbias = [np.mean(delta_I2H, axis=0)]
return dMatrix, dbias
실제로 적용시켜 주는 모오습
def update (W,b,Train,Label):
dW, db = backprop(W,b,Train,Label)
W[0] -= 0.1 * dW[0]
W[1] -= 0.1 * dW[1]
보기 좋게 프린트 찍어보는 모 오습
%%time
for i in range(num_iter*10):
update(W1,b1,Train,Label)
if i % (num_iter/5) == 0:
# sanity check 1
prediction,_,_ = feed(Train,W1,b1)
print("Cross-entropy loss after", i+1, "iterations is {:.8}".format(loss(prediction,Label)))
print("Training accuracy after", i+1, "iterations is {:.4%}\n".format(np.mean(np.argmax(prediction, axis=1)== Label)))
Final_after_backpropa,_,_ = feed(Train,W1,b1)
print("Final cross-entropy loss is {:.8}".format(loss(Final_after_backpropa,Label)))
print("Final training accuracy is {:.4%}".format(np.mean(np.argmax(Final_after_backpropa, axis=1)== Label)))
맞는지 틀린 지 이미지들로 눈으로 한번 확인해봅시다~!
#eye check train set
fig, axes = plt.subplots(2,5, figsize=(12,5))
axes = axes.flatten()
idx = np.random.randint(0,N,size=10)
for i in range(10):
axes[i].imshow(Train[idx[i],:].reshape(28,28), cmap='gray')
axes[i].axis('off') # hide the axes ticks
axes[i].set_title(str(int(Label[idx[i]])), color= 'black',loc='left' ,fontsize=15)
axes[i].set_title("<-real vs pred ->",color='black',fontsize=10)
axes[i].set_title(str(int(np.argmax(Final_after_backpropa, axis=1)[idx[i]])),color='black',loc='right',fontsize=15)
plt.show()
오우 예 여기 나온 그림들은 다 맞춘 거 같네요
'Have Done > MLP' 카테고리의 다른 글
MLP 마무리 (0) | 2022.01.06 |
---|---|
MLP from Scratch with Numpy [6 / 7] - backpropagation2 (0) | 2022.01.06 |
MLP from Scratch with Numpy [5 / 7] - backpropagation1 (0) | 2022.01.06 |
MLP from Scratch with Numpy [4 / 7] - feed forward 코딩 흐름 (0) | 2022.01.05 |
MLP from Scratch with Numpy [3 / 7] - Activation (0) | 2022.01.04 |
댓글