分类器


问题

对于\(n\)个训练样本\(X={x_1,x_2,...,x_n}\)(\(n\)\(d\)维向量)及每个样本所属的类别\((w_1,w_2)\),求\(d+1\)维权重向量\(w\),使得\(sgn(x*w)\)与其对应类别相同。

数据预处理

将每个训练样本增加一个分量\(1\),并将类别为\(w_2\)的样本的每个分量乘\(-1\)

感知器准则

1.初始化权重\(w\)为任意值,选取步长\(c(0
2.循环遍历每一个样本\(x_k\),求出\(g(x_k)=w^T(k)*x_k\)
3.若\(g(x_k)>0\),则\(w\)不变;否则\(w-=x_k*c\)
4.直到连续\(n\)\(g(x_k)>0\),则退出循环,此时的\(w\)即为所求。

点击查看代码
import numpy as np
c = 1.0
w = np.ones(785)
img = []
img2 = []
label = []
def init():
    global img, img2
    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-images.idx3-ubyte', 'rb') as f:
        file = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-labels.idx1-ubyte', 'rb') as f:
        lab = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-images.idx3-ubyte', 'rb') as f:
        file2 = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-labels.idx1-ubyte', 'rb') as f:
        lab2 = f.read()

    for i in range(0, 60000):
        item = lab[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
        IMG_.append(1)
        if (num == 8):
            for j in range(0, len(IMG_)):
                IMG_[j] = -IMG_[j]
        img.append(IMG_)

    img = np.array(img)

    for i in range(0, 10000):
        item = lab2[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
        IMG_.append(1)
        if (num == 8):
            for j in range(0, len(IMG_)):
                IMG_[j] = -IMG_[j]
        img2.append(IMG_)

    img2 = np.array(img2)

def train():
    n=len(img)
    #print(n)
    #for i in range(0,n): print(img[i])
    global w
    t=0
    s=0
    for i in range(0,100000):
        if w.dot(img[t])<=0:
            s=0
            w+=c*img[t]
        else:
            s+=1
        if s==n:
            break
        t=(t+1)%n
    #print(w)

def test():
    global w,img2
    n=len(img2)
    s=0
    for i in range(0,n):
        if w.dot(img2[i])>0: s+=1
    #print(len(w))
    #print(s)
    #print(n)
    print("The accuracy is %.2f"%(s*100/n))


init()
train()
test()

最小平方误差准则(MSE)

1.初始化\(n\)维向量\(b\)(每个分量为任意正值),选取步长\(c(0,并算出\(n*(d+1)\)的矩阵\(X\)的伪逆矩阵\(A\)
2.算出\(w=A*b\)\(e=X*w-b\)
3.若\(e\)的每一个分量都为\(0\),则此时的\(w\)即为所求,退出循环;若\(e\)的每一个分量都为负,则判断无解,退出循环;否则使\(b\)加上\(c*(e+|e|)\),重复步骤2

点击查看代码
import numpy as np
c=1.0
w=np.ones(785)
b=np.ones(11272)
pd=0
img=[]
img2=[]
#label = []
def init():
    global img, img2
    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-images.idx3-ubyte', 'rb') as f:
        file = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-labels.idx1-ubyte', 'rb') as f:
        lab = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-images.idx3-ubyte', 'rb') as f:
        file2 = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-labels.idx1-ubyte', 'rb') as f:
        lab2 = f.read()

    for i in range(0, 60000):
        item = lab[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
        IMG_.append(1)
        if (num == 8):
            for j in range(0, len(IMG_)):
                IMG_[j] = -IMG_[j]
        img.append(IMG_)

    img = np.array(img)

    for i in range(0, 10000):
        item = lab2[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
        IMG_.append(1)
        if (num == 8):
            for j in range(0, len(IMG_)):
                IMG_[j] = -IMG_[j]
        img2.append(IMG_)

    img2 = np.array(img2)

def train():
    n=len(img)
    A=np.linalg.pinv(img)
    #print(n)
    #for i in range(0,n): print(img[i])
    global w,b,pd
    for j in range(0,100):
        w=np.dot(A,b)
        e=np.dot(img,w)-b
        p=0
        q=0
        for i in range(0,785):
            if e[i]!=0: p=1
            if e[i]>=0: q=1
        if p==0: break
        if q==0:
            pd=1
            break
        for i in range(0, 785):
            e[i]+=abs(e[i])
        b+=c*e
    #print(w)

def test():
    global w,img2
    n=len(img2)
    s=0
    for i in range(0,n):
        if w.dot(img2[i])>0: s+=1
    #print(len(w))
    #print(s)
    #print(n)
    print("The accuracy is %.2f"%(s*100/n))


init()
train()
if pd :
    print("No Solution")
else:
    test()

Fisher 准则

点击查看代码
import numpy as np
w=np.zeros(784)
w0=0
X1=[]
X2=[]
T1=[]
T2=[]
def init():
    global X1,X2,T1,T2
    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-images.idx3-ubyte', 'rb') as f:
        file = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\train-labels.idx1-ubyte', 'rb') as f:
        lab = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-images.idx3-ubyte', 'rb') as f:
        file2 = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\分类器作业\t10k-labels.idx1-ubyte', 'rb') as f:
        lab2 = f.read()

    for i in range(0, 60000):
        item = lab[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
        if num == 5:
            X1.append(IMG_)
        else:
            X2.append(IMG_)

    X1 = np.array(X1)
    X2 = np.array(X2)

    for i in range(0, 10000):
        item = lab2[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
        if num == 5:
            T1.append(IMG_)
        else:
            T2.append(IMG_)

    T1 = np.array(X1)
    T2 = np.array(X2)

def train():
    global X1,X2,w0,w
    m1=np.zeros(784)
    m2=np.zeros(784)
    n1=len(X1)
    n2=len(X2)

    for i in range(0,n1): m1+=X1[i]/n1
    #m1/=n1
    for i in range(0,n2): m2+=X2[i]/n2
    #m2/=n2
    #print(X1)
    s1=np.zeros((784,784))
    s2=np.zeros((784,784))
    for i in range(0,n1):
        t=X1[i]-m1
        s1+=np.dot(t.reshape(-1,1),t.reshape(1,-1))
    for i in range(0,n2):
        t=X2[i]-m2
        s2+=np.dot(t.reshape(-1,1),t.reshape(1,-1))

    s1+=s2
    #print(s1)
    s=np.linalg.pinv(s1)


    w=np.dot(s,m1-m2)

    #w=np.transpose(w)
    #print(w)

    #for i in range(0, n1): w0+=np.dot(X1[i],w)/n1
    #for i in range(0, n2): w0+=np.dot(X2[i],w)/n2
    w0=-0.5*(np.dot(w,m1)+np.dot(w,m2))


def test():
    global T1,T2
    #print(w0)
    n1=len(T1)
    n2=len(T2)
    s=0
    for i in range(0, n1):
        if np.dot(T1[i],w)>w0: s+=1
        #print(np.dot(T1[i],w))
    for i in range(0, n2):
        if np.dot(T2[i],w)
CS