感知机模型

数学描述

y^=f(wTx+b)\hat{y} = f \left( \boldsymbol{w}^T\boldsymbol{x} +b \right)

w=[w1w2wn]Rn,x=[x1x2xn]Rn,bR\boldsymbol{w} = \begin{bmatrix}w_1 \\ w_2 \\ \cdots \\ w_n\end{bmatrix}\in\mathbb{R}^n,\qquad \boldsymbol{x} = \begin{bmatrix}x_1 \\ x_2 \\ \vdots \\ x_n\end{bmatrix}\in\mathbb{R}^n,\qquad b \in\mathbb{R}

f(z)={1z00otherwisef(z) = \begin{cases} 1 & z \geq 0 \\ 0 & otherwise \end{cases}

训练数据集

T={(x1,y1),(x2,y2),,(xN,yN)},xiRn,yi{0,1}T = \left\{ (\boldsymbol{x^1},y^1),(\boldsymbol{x^2},y^2),\cdots,(\boldsymbol{x^N},y^N)\right\},\qquad \boldsymbol{x^i}\in\mathbb{R}^n,y^i\in\{0,1\}

损失函数

  • 对于单个样本(xi,yi)T(\boldsymbol{x}^i,y^i)\in T

Errori=12(yiy^i)2,i=1,2,,Ny^i=f(wTxi+b),i=1,2,,N\begin{gather} {\rm Error}^i = \frac{1}{2}(y^i-\hat{y}^i)^2,\qquad i = 1,2,\cdots,N \\ \hat{y}^i = f(\boldsymbol{w}^T\boldsymbol{x}^i+b),\qquad i=1,2,\cdots,N \end{gather}

  • 对于整个数据集 TT

Error=12i=1N(yiy^i)2y^i=f(wTxi+b),i=1,2,,N\begin{gather} {\rm Error} = \frac{1}{2}\sum_{i=1}^N (y^i-\hat{y}^i)^2 \\ \hat{y}^i = f \left( \boldsymbol{w}^T\boldsymbol{x}^i +b \right) ,\qquad i=1,2,\cdots,N \end{gather}

参数训练

随机梯度下降

  • **随机选取样本 (xi,yi)T(\boldsymbol{x}^i,y^i)\in T **

xi=[x1i,x2i,,xni]T\boldsymbol{x}^i = [x_1^i,x_2^i,\cdots,x_n^i]^T

Erroriwj=Erroriy^iy^iwi=(yiy^i)xjiErrorb=Errory^iy^ib=(yiy^i)\begin{split} \frac{\partial {\rm Error^i}}{\partial w_j} &= \frac{\partial \rm{Error^i}}{\partial \hat{y}^i}\frac{\partial \hat{y}^i}{\partial w_i} = -(y^i-\hat{y}^i)x_j^i \\ \frac{\partial {\rm Error}}{\partial b} &= \frac{\partial \rm{Error}}{\partial \hat{y}^i}\frac{\partial \hat{y}^i}{\partial b} = -(y^i-\hat{y}^i) \end{split}

如下更新参数, 其中 η\eta 是步长(学习率)

wjwj+η(yiy^i)xji,j=1,2,,nbb+η(yiy^i)\begin{split} w_j &\leftarrow w_j + \eta(y^i - \hat{y}^i)x_j^i,\qquad j = 1,2,\cdots,n \\ b &\leftarrow b + \eta(y^i-\hat{y}^i) \end{split}

整体梯度下降

  • **对于整个数据集 TT **

Errorwj=i=1NErroriy^iy^iwj=i=1N(yiy^i)xji,j=1,2,,nErrorb=i=1NErroriy^iy^ib=i=1N(yiy^i)\begin{split} \frac{\partial {\rm Error}}{\partial w_j} &= \sum_{i=1}^N\frac{\partial {\rm Error}^i}{\partial \hat{y}^i}\frac{\partial \hat{y}^i}{\partial w_j} = -\sum_{i=1}^N(y^i-\hat{y}^i)x_j^i, \qquad j = 1,2,\cdots,n \\ \frac{\partial {\rm Error}}{\partial b} &= \sum_{i=1}^N\frac{\partial {\rm Error}^i}{\partial \hat{y}^i}\frac{\partial \hat{y}^i}{\partial b} = -\sum_{i=1}^N(y^i-\hat{y}^i) \end{split}

如下更新参数, 其中 η\eta 是步长(学习率)

wjwj+ηi=1N(yiy^i)xji,j=1,2,,nbb+ηi=1N(yiy^i)\begin{split} w_j &\leftarrow w_j + \eta\sum_{i=1}^N(y^i - \hat{y}^i)x^i_j,\qquad j = 1,2,\cdots,n \\ b &\leftarrow b + \eta\sum_{i=1}^N(y^i-\hat{y}^i) \end{split}

Python3实现

这里构建一个感知机, 并实现and函数真值表训练感知机

T={([11],1),([10],0),([01],0),([00],0)}T = \{ (\begin{bmatrix}1\\1\end{bmatrix},1), (\begin{bmatrix}1\\0\end{bmatrix},0), (\begin{bmatrix}0\\1\end{bmatrix},0), (\begin{bmatrix}0\\0\end{bmatrix},0)\}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from functools import reduce

class Perceptron(object):
def __init__(self, input_num, activator):
## 初始化感知器
self.activator = activator
## self.weights = [0.0 ]*input_num
self.weights = [0.0 for _ in range(input_num)]
## 偏置项b
self.bias = 0.0

def __str__(self):
## 打印权重、偏置项
return 'weights:\t %s\n bias:\t %f\n' % (self.weights, self.bias)

def predict(self, input_vec):
## 输入向量,输出感知器的计算结果
## y = f(sum (vec*weight) + bias)
y = self.activator(
## reduce函数+lambda匿名函数用于求和
reduce(lambda a, b: a+b,
## map函数+匿名函数用于求乘积vn*wn
list(
map(
lambda x, w: x*w,
## zip函数将两个列表打包成元组
## [v1,...,vn]和[w1,...,wn]打包成[(v1,w1),...,(vn,wn)]
input_vec, self.weights
)
)
) + self.bias
)
return y

def train(self, input_vecs, labels, iteration, rate):
'''
循环迭代,训练感知机参数
input_vecs : 输入向量
labels : 输入向量对应的标签
iteration : 迭代轮数
rate : 学习率
'''
for i in range(iteration):
self._one_iteration(input_vecs, labels, rate)

def _one_iteration(self, input_vecs, labels, rate):
'''
迭代一次
'''
samples = zip(input_vecs, labels)
for (input_vec, label) in samples:
## 计算感知器在当前权重下的输出
output = self.predict(input_vec)
self._update_weights(input_vec, output, label, rate)

def _update_weights(self, input_vec, output, label, rate):
'''
更新权重
'''
## wi <-- wi + Δwi
## Δwi = η*(t-y)*xi
delta = label - output
self.weights = list(map(
lambda x, w: w + rate * delta * x,
input_vec, self.weights
))
## b <-- b + Δb
## Δb = η*(t-y)
self.bias = self.bias + rate*delta

def f(x):
'''
定义激活函数
'''
return 1.0 if x > 0 else 0.0

def get_training_dataset():
'''
基于and真值表构建训练数据
'''
input_vecs = [[1,1],[0,0],[1,0],[0,1]]
labels = [1,0,0,0]
return input_vecs, labels

def train_and_perceptron():
'''
使用and真值表训练感知机
'''
## 获得训练数据
input_vecs, labels = get_training_dataset()
## 创建感知机, 输入参数为input_num = 2, activator = f
p = Perceptron(2,f)
## 训练感知机
p.train(input_vecs, labels, 10, 0.1)
## 返回训练好的感知机
return p

if __name__ == '__main__':
## 训练and感知机
and_perceptron = train_and_perceptron()
## 打印训练后的权重
print(and_perceptron)
## 测试
print('1 and 1 = %d' % and_perceptron.predict([1, 1]))
print('0 and 0 = %d' % and_perceptron.predict([0, 0]))
print('1 and 0 = %d' % and_perceptron.predict([1, 0]))
print('0 and 1 = %d' % and_perceptron.predict([0, 1]))

输出

1
2
3
4
5
6
7
weights:         [0.1, 0.2]
bias: -0.200000

1 and 1 = 1
0 and 0 = 0
1 and 0 = 0
0 and 1 = 0