梯度下降实验

# 前置数据

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.colors
from matplotlib import animation, rc
from IPython.display import HTML
import numpy as np
import math
#可视化选项
animation_frames = 50
w_min = -10
w_max = 10
b_min = -10
b_max = 10
#数据
X = np.asarray([3.5, 0.35, 3.2, -2.0, 1.5, -0.5])
Y = np.asarray([0.5, 0.50, 0.5,  0.5, 0.1,  0.3])
#优化器参数
SGD_optimizer = {'name':'SGD','epoch':10000,'lr':0.1}
#补充优化器参数
MiniBatch_optimizer = {'name':'MiniBatch','epoch':10000,'lr':1}
Momentum_optimizer = {'name':'Momentum','epoch':10000,'lr':1}
NAG_optimizer = {'name':'NAG','epoch':10000,'lr':1}
AdaGrad_optimizer = {'name':'AdaGrad','epoch':10000,'lr':0.1}
RMSProp_optimizer = {'name':'RMSProp','epoch':10000,'lr':0.1}
Adam_optimizer = {'name':'Adam','epoch':10000,'lr':0.1}

# 优化器代码

#优化器,需补全各个实现代码,已有 SGD 例子
class Tester:
    def __init__(self, w_init=-6.0, b_init=4.0, optimizer=SGD_optimizer):
        self.w = w_init
        self.b = b_init
        self.w_h = []
        self.b_h = []
        self.e_h = []
        self.optimizer = optimizer
        #参数,设置如有需要
    #激活函数
    def sigmoid(self, x, w=None, b=None):
        if w is None:
            w = self.w
        if b is None:
            b = self.b
        return 1. / (1. + np.exp(-(w*x + b)))
    #计算误差
    def loss(self, X, Y, w=None, b=None):
        if w is None:
            w = self.w
        if b is None:
            b = self.b
        cost = 0
        for x, y in zip(X, Y):
            cost += 0.5 * (self.sigmoid(x, w, b) - y) ** 2
        return cost
    
    def grad_w(self, x, y, w=None, b=None):
        if w is None:
            w = self.w
        if b is None:
            b = self.b
        y_pred = self.sigmoid(x, w, b)
        return (y_pred - y) * y_pred * (1 - y_pred) * x
    
    def grad_b(self, x, y, w=None, b=None):
        if w is None:
            w = self.w
        if b is None:
            b = self.b
        y_pred = self.sigmoid(x, w, b)
        return (y_pred - y) * y_pred * (1 - y_pred)
    
    def fit(self, X, Y):
        self.w_h = []
        self.b_h = []
        self.e_h = []
        self.X = X
        self.Y = Y
        
        if self.optimizer['name'] == 'SGD':
            for i in range(self.optimizer['epoch']):
                dw, db = 0, 0
                for x, y in zip(X, Y):
                    dw += self.grad_w(x, y)
                    db += self.grad_b(x, y)
                    self.w -= self.optimizer['lr'] * dw
                    self.b -= self.optimizer['lr'] * db
                #记录参数与误差历史用于可视化
                self.append_param_history()
            
        elif self.optimizer['name'] == 'MiniBatch':
            batch_size=2
            num_points_seen=0
            for i in range(self.optimizer['epoch']):
                dw,db=0,0
                for x,y in zip(X,Y):
                    dw += self.grad_w(x, y)
                    db += self.grad_b(x, y)
                    num_points_seen+=1
                    if num_points_seen % batch_size == 0:
                        self.w-=self.optimizer['lr']*dw
                        self.b-=self.optimizer['lr']*db
                        dw,db=0,0
                self.append_param_history()
            
        elif self.optimizer['name'] == 'Momentum':
            prev_v_w,prev_v_b,gamma=0,0,0.9
            for i in range(self.optimizer['epoch']):
                dw,db=0,0
                for x,y in zip(X,Y):
                    dw += self.grad_w(x,y)
                    db += self.grad_b(x,y)
                v_w = gamma*prev_v_w + self.optimizer['lr']*dw
                v_b = gamma*prev_v_b + self.optimizer['lr']*db
                self.w -= v_w
                self.b -= v_b
                prev_v_w = v_w
                prev_v_b = v_b
                self.append_param_history()
            
        elif self.optimizer['name'] == 'NAG':
            prev_v_w,prev_v_b,gamma=0,0,0.9
            for i in range(self.optimizer['epoch']):
                dw,db=0,0
                v_w = gamma*prev_v_w
                v_b = gamma*prev_v_b
                for x,y in zip(X,Y):
                    dw += self.grad_w(x,y,self.w-v_w,self.b-v_b)
                    db += self.grad_b(x,y,self.w-v_w,self.b-v_b)
                v_w=gamma*prev_v_w+self.optimizer['lr']*dw
                v_b=gamma*prev_v_b+self.optimizer['lr']*db
                self.w -= v_w
                self.b -= v_b
                prev_v_w = v_w
                prev_v_b = v_b
                self.append_param_history()
                
        elif self.optimizer['name'] == 'AdaGrad':
            v_w,v_b,eps=0,0,1e-8
            for i in range(self.optimizer['epoch']):
                dw,db=0,0
                for x,y in zip(X,Y):
                    dw += self.grad_w(x,y)
                    db += self.grad_b(x,y)
                v_w=v_w+dw**2
                v_b=v_b+db**2
                self.w=self.w-(self.optimizer['lr']/np.sqrt(v_w+eps))*dw
                self.b=self.b-(self.optimizer['lr']/np.sqrt(v_b+eps))*db
                self.append_param_history()
            
        elif self.optimizer['name'] == 'RMSProp':
            v_w,v_b,eps,beta1=0,0,1e-8,0.9
            for i in range(self.optimizer['epoch']):
                dw,db=0,0
                for x,y in zip(X,Y):
                    dw+=self.grad_w(x,y)
                    db+=self.grad_b(x,y)
                v_w=beta1*v_w+(1-beta1)*dw**2
                v_b=beta1*v_b+(1-beta1)*db**2
                self.w-=(self.optimizer['lr']/np.sqrt(v_w+eps))*dw
                self.b-=(self.optimizer['lr']/np.sqrt(v_b+eps))*db
                self.append_param_history()
            
        elif self.optimizer['name'] == 'Adam':
            m_w,m_b,v_w,v_b,m_w_hat,m_b_hat,v_w_hat,v_b_hat,eps,beta1,beta2=0,0,0,0,0,0,0,0,1e-8,0.9,0.999
            for i in range(self.optimizer['epoch']):
                dw,db=0,0
                for x,y in zip(X,Y):
                    dw+=self.grad_w(x,y)
                    db+=self.grad_b(x,y)
                m_w=beta1*m_w+(1-beta1)*dw
                m_b=beta1*m_b+(1-beta1)*db
                v_w=beta2*v_w+(1-beta2)*dw**2
                v_b=beta2*v_b+(1-beta2)*db**2
                m_w_hat=m_w/(1-math.pow(beta1,i+1))
                m_b_hat=m_b/(1-math.pow(beta1,i+1))
                v_w_hat=v_w/(1-math.pow(beta2,i+1))
                v_b_hat=v_b/(1-math.pow(beta2,i+1))
                self.w-=(self.optimizer['lr']/np.sqrt(v_w_hat+eps))*m_w_hat
                self.b-=(self.optimizer['lr']/np.sqrt(v_b_hat+eps))*m_b_hat
                self.append_param_history()
            
    def append_param_history(self):
        self.w_h.append(self.w)
        self.b_h.append(self.b)
        self.e_h.append(self.loss(self.X, self.Y))

# 误差曲线

# SGD
Exp = Tester(optimizer=SGD_optimizer)
plt.style.use("seaborn")
Exp.fit(X, Y)
plt.plot(Exp.e_h, 'r')
plt.plot(Exp.w_h, 'b')
plt.plot(Exp.b_h, 'g')
plt.legend(["Error","Weight","Bias"])
plt.title("Parameters and Error in SGD")
plt.show()

SGD

#SGD 参数可视化
W = np.linspace(w_min, w_max, 256)
b = np.linspace(b_min, b_max, 256)
WW, BB = np.meshgrid(W, b)
Z = Exp.loss(X, Y, WW, BB)
def plot_animate_2d(i):
  i = int(i*(SGD_optimizer['epoch']/animation_frames))
  line.set_data(Exp.w_h[:i+1], Exp.b_h[:i+1])
  title.set_text('Epoch: {: d}, Error: {:.4f}'.format(i, Exp.e_h[i]))
  return line, title
fig = plt.figure(dpi=100)
ax = plt.subplot(111)
ax.set_xlabel('w')
ax.set_xlim(w_min - 1, w_max + 1)
ax.set_ylabel('b')
ax.set_ylim(b_min - 1, b_max + 1)
title = ax.set_title('Epoch 0')
cset = plt.contourf(WW, BB, Z, 25, alpha=0.6, cmap=cm.bwr)
i = 0
line, = ax.plot(Exp.w_h[:i+1], Exp.b_h[:i+1], color='black',marker='.')
anim = animation.FuncAnimation(fig, func=plot_animate_2d, frames=animation_frames)
rc('animation', html='jshtml')
print('Optimizer = {}, Lr = {}'.format(SGD_optimizer['name'], SGD_optimizer['lr']))
anim

SGD动图

# MiniBatch
Exp = Tester(optimizer=MiniBatch_optimizer)
plt.style.use("seaborn")
Exp.fit(X, Y)
plt.plot(Exp.e_h, 'r')
plt.plot(Exp.w_h, 'b')
plt.plot(Exp.b_h, 'g')
plt.legend(["Error","Weight","Bias"])
plt.title("Parameters and Error in MiniBatch")
plt.show()

MiniBatch

W = np.linspace(w_min, w_max, 256)
b = np.linspace(b_min, b_max, 256)
WW, BB = np.meshgrid(W, b)
Z = Exp.loss(X, Y, WW, BB)
def plot_animate_2d(i):
  i = int(i*(MiniBatch_optimizer['epoch']/animation_frames))
  line.set_data(Exp.w_h[:i+1], Exp.b_h[:i+1])
  title.set_text('Epoch: {: d}, Error: {:.4f}'.format(i, Exp.e_h[i]))
  return line, title
fig = plt.figure(dpi=100)
ax = plt.subplot(111)
ax.set_xlabel('w')
ax.set_xlim(w_min - 1, w_max + 1)
ax.set_ylabel('b')
ax.set_ylim(b_min - 1, b_max + 1)
title = ax.set_title('Epoch 0')
cset = plt.contourf(WW, BB, Z, 25, alpha=0.6, cmap=cm.bwr)
i = 0
line, = ax.plot(Exp.w_h[:i+1], Exp.b_h[:i+1], color='black',marker='.')
anim = animation.FuncAnimation(fig, func=plot_animate_2d, frames=animation_frames)
rc('animation', html='jshtml')
print('Optimizer = {}, Lr = {}'.format(MiniBatch_optimizer['name'], MiniBatch_optimizer['lr']))
anim

MiniBatch动图

# Momentum
Exp = Tester(optimizer=Momentum_optimizer)
plt.style.use("seaborn")
Exp.fit(X, Y)
plt.plot(Exp.e_h, 'r')
plt.plot(Exp.w_h, 'b')
plt.plot(Exp.b_h, 'g')
plt.legend(["Error","Weight","Bias"])
plt.title("Parameters and Error in Momentum")
plt.show()

Momentum

W = np.linspace(w_min, w_max, 256)
b = np.linspace(b_min, b_max, 256)
WW, BB = np.meshgrid(W, b)
Z = Exp.loss(X, Y, WW, BB)
def plot_animate_2d(i):
  i = int(i*(Momentum_optimizer['epoch']/animation_frames))
  line.set_data(Exp.w_h[:i+1], Exp.b_h[:i+1])
  title.set_text('Epoch: {: d}, Error: {:.4f}'.format(i, Exp.e_h[i]))
  return line, title
fig = plt.figure(dpi=100)
ax = plt.subplot(111)
ax.set_xlabel('w')
ax.set_xlim(w_min - 1, w_max + 1)
ax.set_ylabel('b')
ax.set_ylim(b_min - 1, b_max + 1)
title = ax.set_title('Epoch 0')
cset = plt.contourf(WW, BB, Z, 25, alpha=0.6, cmap=cm.bwr)
i = 0
line, = ax.plot(Exp.w_h[:i+1], Exp.b_h[:i+1], color='black',marker='.')
anim = animation.FuncAnimation(fig, func=plot_animate_2d, frames=animation_frames)
rc('animation', html='jshtml')
print('Optimizer = {}, Lr = {}'.format(Momentum_optimizer['name'], Momentum_optimizer['lr']))
anim

Momentum动图

# NAGD
Exp = Tester(optimizer=NAG_optimizer)
plt.style.use("seaborn")
Exp.fit(X, Y)
plt.plot(Exp.e_h, 'r')
plt.plot(Exp.w_h, 'b')
plt.plot(Exp.b_h, 'g')
plt.legend(["Error","Weight","Bias"])
plt.title("Parameters and Error in NAG")
plt.show()

NAGD

W = np.linspace(w_min, w_max, 256)
b = np.linspace(b_min, b_max, 256)
WW, BB = np.meshgrid(W, b)
Z = Exp.loss(X, Y, WW, BB)
def plot_animate_2d(i):
  i = int(i*(NAG_optimizer['epoch']/animation_frames))
  line.set_data(Exp.w_h[:i+1], Exp.b_h[:i+1])
  title.set_text('Epoch: {: d}, Error: {:.4f}'.format(i, Exp.e_h[i]))
  return line, title
fig = plt.figure(dpi=100)
ax = plt.subplot(111)
ax.set_xlabel('w')
ax.set_xlim(w_min - 1, w_max + 1)
ax.set_ylabel('b')
ax.set_ylim(b_min - 1, b_max + 1)
title = ax.set_title('Epoch 0')
cset = plt.contourf(WW, BB, Z, 25, alpha=0.6, cmap=cm.bwr)
i = 0
line, = ax.plot(Exp.w_h[:i+1], Exp.b_h[:i+1], color='black',marker='.')
anim = animation.FuncAnimation(fig, func=plot_animate_2d, frames=animation_frames)
rc('animation', html='jshtml')
print('Optimizer = {}, Lr = {}'.format(NAG_optimizer['name'], NAG_optimizer['lr']))
anim

NAGD动图

# AdaGrad
Exp = Tester(optimizer=AdaGrad_optimizer)
plt.style.use("seaborn")
Exp.fit(X, Y)
plt.plot(Exp.e_h, 'r')
plt.plot(Exp.w_h, 'b')
plt.plot(Exp.b_h, 'g')
plt.legend(["Error","Weight","Bias"])
plt.title("Parameters and Error in AdaGrad")
plt.show()

AdaGrad

W = np.linspace(w_min, w_max, 256)
b = np.linspace(b_min, b_max, 256)
WW, BB = np.meshgrid(W, b)
Z = Exp.loss(X, Y, WW, BB)
def plot_animate_2d(i):
  i = int(i*(AdaGrad_optimizer['epoch']/animation_frames))
  line.set_data(Exp.w_h[:i+1], Exp.b_h[:i+1])
  title.set_text('Epoch: {: d}, Error: {:.4f}'.format(i, Exp.e_h[i]))
  return line, title
fig = plt.figure(dpi=100)
ax = plt.subplot(111)
ax.set_xlabel('w')
ax.set_xlim(w_min - 1, w_max + 1)
ax.set_ylabel('b')
ax.set_ylim(b_min - 1, b_max + 1)
title = ax.set_title('Epoch 0')
cset = plt.contourf(WW, BB, Z, 25, alpha=0.6, cmap=cm.bwr)
i = 0
line, = ax.plot(Exp.w_h[:i+1], Exp.b_h[:i+1], color='black',marker='.')
anim = animation.FuncAnimation(fig, func=plot_animate_2d, frames=animation_frames)
rc('animation', html='jshtml')
print('Optimizer = {}, Lr = {}'.format(AdaGrad_optimizer['name'], AdaGrad_optimizer['lr']))
anim

AdaGrad动图

# RMSProp
Exp = Tester(optimizer=RMSProp_optimizer)
plt.style.use("seaborn")
Exp.fit(X, Y)
plt.plot(Exp.e_h, 'r')
plt.plot(Exp.w_h, 'b')
plt.plot(Exp.b_h, 'g')
plt.legend(["Error","Weight","Bias"])
plt.title("Parameters and Error in RMSProp")
plt.show()

RMSProp

W = np.linspace(w_min, w_max, 256)
b = np.linspace(b_min, b_max, 256)
WW, BB = np.meshgrid(W, b)
Z = Exp.loss(X, Y, WW, BB)
def plot_animate_2d(i):
  i = int(i*(RMSProp_optimizer['epoch']/animation_frames))
  line.set_data(Exp.w_h[:i+1], Exp.b_h[:i+1])
  title.set_text('Epoch: {: d}, Error: {:.4f}'.format(i, Exp.e_h[i]))
  return line, title
fig = plt.figure(dpi=100)
ax = plt.subplot(111)
ax.set_xlabel('w')
ax.set_xlim(w_min - 1, w_max + 1)
ax.set_ylabel('b')
ax.set_ylim(b_min - 1, b_max + 1)
title = ax.set_title('Epoch 0')
cset = plt.contourf(WW, BB, Z, 25, alpha=0.6, cmap=cm.bwr)
i = 0
line, = ax.plot(Exp.w_h[:i+1], Exp.b_h[:i+1], color='black',marker='.')
anim = animation.FuncAnimation(fig, func=plot_animate_2d, frames=animation_frames)
rc('animation', html='jshtml')
print('Optimizer = {}, Lr = {}'.format(RMSProp_optimizer['name'], RMSProp_optimizer['lr']))
anim

RMSProp动图

# Adam
Exp = Tester(optimizer=Adam_optimizer)
plt.style.use("seaborn")
Exp.fit(X, Y)
plt.plot(Exp.e_h, 'r')
plt.plot(Exp.w_h, 'b')
plt.plot(Exp.b_h, 'g')
plt.legend(["Error","Weight","Bias"])
plt.title("Parameters and Error in Adam")
plt.show()

Adam

W = np.linspace(w_min, w_max, 256)
b = np.linspace(b_min, b_max, 256)
WW, BB = np.meshgrid(W, b)
Z = Exp.loss(X, Y, WW, BB)
def plot_animate_2d(i):
  i = int(i*(Adam_optimizer['epoch']/animation_frames))
  line.set_data(Exp.w_h[:i+1], Exp.b_h[:i+1])
  title.set_text('Epoch: {: d}, Error: {:.4f}'.format(i, Exp.e_h[i]))
  return line, title
fig = plt.figure(dpi=100)
ax = plt.subplot(111)
ax.set_xlabel('w')
ax.set_xlim(w_min - 1, w_max + 1)
ax.set_ylabel('b')
ax.set_ylim(b_min - 1, b_max + 1)
title = ax.set_title('Epoch 0')
cset = plt.contourf(WW, BB, Z, 25, alpha=0.6, cmap=cm.bwr)
i = 0
line, = ax.plot(Exp.w_h[:i+1], Exp.b_h[:i+1], color='black',marker='.')
anim = animation.FuncAnimation(fig, func=plot_animate_2d, frames=animation_frames)
rc('animation', html='jshtml')
print('Optimizer = {}, Lr = {}'.format(Adam_optimizer['name'], Adam_optimizer['lr']))
anim

Adam动图

全部完成,每个优化器生成的还有动态路径图。

更新于 阅读次数

请我喝[茶]~( ̄▽ ̄)~*