分类器
问题
对于\(n\)个训练样本\(X={x_1,x_2,...,x_n}\)(\(n\)个\(d\)维向量)及每个样本所属的类别\((w_1,w_2)\),求\(d+1\)维权重向量\(w\),使得\(sgn(x*w)\)与其对应类别相同。
数据预处理
将每个训练样本增加一个分量\(1\),并将类别为\(w_2\)的样本的每个分量乘\(-1\)
感知器准则
1.初始化权重\(w\)为任意值,选取步长\(c(0
2.循环遍历每一个样本\(x_k\),求出\(g(x_k)=w^T(k)*x_k\)
3.若\(g(x_k)>0\),则\(w\)不变;否则\(w-=x_k*c\)
4.直到连续\(n\)次\(g(x_k)>0\),则退出循环,此时的\(w\)即为所求。
点击查看代码
import numpy as np
c = 1.0
w = np.ones(785)
img = []
img2 = []
label = []
def init():
global img, img2
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-images.idx3-ubyte', 'rb') as f:
file = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-labels.idx1-ubyte', 'rb') as f:
lab = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-images.idx3-ubyte', 'rb') as f:
file2 = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-labels.idx1-ubyte', 'rb') as f:
lab2 = f.read()
for i in range(0, 60000):
item = lab[8 + i]
num = int(str(item).encode('UTF-8'), 16)
if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
IMG_.append(1)
if (num == 8):
for j in range(0, len(IMG_)):
IMG_[j] = -IMG_[j]
img.append(IMG_)
img = np.array(img)
for i in range(0, 10000):
item = lab2[8 + i]
num = int(str(item).encode('UTF-8'), 16)
if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
IMG_.append(1)
if (num == 8):
for j in range(0, len(IMG_)):
IMG_[j] = -IMG_[j]
img2.append(IMG_)
img2 = np.array(img2)
def train():
n=len(img)
#print(n)
#for i in range(0,n): print(img[i])
global w
t=0
s=0
for i in range(0,100000):
if w.dot(img[t])<=0:
s=0
w+=c*img[t]
else:
s+=1
if s==n:
break
t=(t+1)%n
#print(w)
def test():
global w,img2
n=len(img2)
s=0
for i in range(0,n):
if w.dot(img2[i])>0: s+=1
#print(len(w))
#print(s)
#print(n)
print("The accuracy is %.2f"%(s*100/n))
init()
train()
test()
最小平方误差准则(MSE)
1.初始化\(n\)维向量\(b\)(每个分量为任意正值),选取步长\(c(0
2.算出\(w=A*b\)及\(e=X*w-b\)
3.若\(e\)的每一个分量都为\(0\),则此时的\(w\)即为所求,退出循环;若\(e\)的每一个分量都为负,则判断无解,退出循环;否则使\(b\)加上\(c*(e+|e|)\),重复步骤2
点击查看代码
import numpy as np
c=1.0
w=np.ones(785)
b=np.ones(11272)
pd=0
img=[]
img2=[]
#label = []
def init():
global img, img2
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-images.idx3-ubyte', 'rb') as f:
file = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-labels.idx1-ubyte', 'rb') as f:
lab = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-images.idx3-ubyte', 'rb') as f:
file2 = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-labels.idx1-ubyte', 'rb') as f:
lab2 = f.read()
for i in range(0, 60000):
item = lab[8 + i]
num = int(str(item).encode('UTF-8'), 16)
if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
IMG_.append(1)
if (num == 8):
for j in range(0, len(IMG_)):
IMG_[j] = -IMG_[j]
img.append(IMG_)
img = np.array(img)
for i in range(0, 10000):
item = lab2[8 + i]
num = int(str(item).encode('UTF-8'), 16)
if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
IMG_.append(1)
if (num == 8):
for j in range(0, len(IMG_)):
IMG_[j] = -IMG_[j]
img2.append(IMG_)
img2 = np.array(img2)
def train():
n=len(img)
A=np.linalg.pinv(img)
#print(n)
#for i in range(0,n): print(img[i])
global w,b,pd
for j in range(0,100):
w=np.dot(A,b)
e=np.dot(img,w)-b
p=0
q=0
for i in range(0,785):
if e[i]!=0: p=1
if e[i]>=0: q=1
if p==0: break
if q==0:
pd=1
break
for i in range(0, 785):
e[i]+=abs(e[i])
b+=c*e
#print(w)
def test():
global w,img2
n=len(img2)
s=0
for i in range(0,n):
if w.dot(img2[i])>0: s+=1
#print(len(w))
#print(s)
#print(n)
print("The accuracy is %.2f"%(s*100/n))
init()
train()
if pd :
print("No Solution")
else:
test()
Fisher 准则
点击查看代码
import numpy as np
w=np.zeros(784)
w0=0
X1=[]
X2=[]
T1=[]
T2=[]
def init():
global X1,X2,T1,T2
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-images.idx3-ubyte', 'rb') as f:
file = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-labels.idx1-ubyte', 'rb') as f:
lab = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-images.idx3-ubyte', 'rb') as f:
file2 = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-labels.idx1-ubyte', 'rb') as f:
lab2 = f.read()
for i in range(0, 60000):
item = lab[8 + i]
num = int(str(item).encode('UTF-8'), 16)
if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
if num == 5:
X1.append(IMG_)
else:
X2.append(IMG_)
X1 = np.array(X1)
X2 = np.array(X2)
for i in range(0, 10000):
item = lab2[8 + i]
num = int(str(item).encode('UTF-8'), 16)
if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
if num == 5:
T1.append(IMG_)
else:
T2.append(IMG_)
T1 = np.array(X1)
T2 = np.array(X2)
def train():
global X1,X2,w0,w
m1=np.zeros(784)
m2=np.zeros(784)
n1=len(X1)
n2=len(X2)
for i in range(0,n1): m1+=X1[i]/n1
#m1/=n1
for i in range(0,n2): m2+=X2[i]/n2
#m2/=n2
#print(X1)
s1=np.zeros((784,784))
s2=np.zeros((784,784))
for i in range(0,n1):
t=X1[i]-m1
s1+=np.dot(t.reshape(-1,1),t.reshape(1,-1))
for i in range(0,n2):
t=X2[i]-m2
s2+=np.dot(t.reshape(-1,1),t.reshape(1,-1))
s1+=s2
#print(s1)
s=np.linalg.pinv(s1)
w=np.dot(s,m1-m2)
#w=np.transpose(w)
#print(w)
#for i in range(0, n1): w0+=np.dot(X1[i],w)/n1
#for i in range(0, n2): w0+=np.dot(X2[i],w)/n2
w0=-0.5*(np.dot(w,m1)+np.dot(w,m2))
def test():
global T1,T2
#print(w0)
n1=len(T1)
n2=len(T2)
s=0
for i in range(0, n1):
if np.dot(T1[i],w)>w0: s+=1
#print(np.dot(T1[i],w))
for i in range(0, n2):
if np.dot(T2[i],w)